Setup

library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
source("/home/guestA/n70275b/work/rscripts/geomNorm.R")

# Helper function
#ggpoints <- function(x,...) 
#  ggplot(x,...) + geom_point(size=3,stroke=1) +
#  ggrepel::geom_text_repel(size=4) + theme_minimal() + mycolor

## ラベルあり
ggpoints <- function(x,...) 
  ggplot(x,...) + geom_point(stroke=1) +
  ggrepel::geom_text_repel(size=4) + theme_minimal() + mycolor

## ラベルなし
#ggpoints <- function(x,...) 
#  ggplot(x,...) + geom_point(stroke=1) + theme_minimal() + mycolor

print(sessionInfo(),locale=FALSE)
R version 4.0.1 (2020-06-06)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux

Matrix products: default
BLAS/LAPACK: /usr/local/intel2018_up1/compilers_and_libraries_2018.0.128/linux/mkl/lib/intel64_lin/libmkl_intel_lp64.so

attached base packages:
 [1] grid      stats4    parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] stringr_1.4.0                             hrbrthemes_0.8.0                          ggrepel_0.8.2                             ggpubr_0.4.0.999                         
 [5] gplots_3.0.4                              DESeq2_1.28.1                             GGally_2.0.0                              vcd_1.4-7                                
 [9] BiocParallel_1.22.0                       Matrix_1.2-18                             SummarizedExperiment_1.18.2               DelayedArray_0.14.1                      
[13] matrixStats_0.56.0                        motifmatchr_1.10.0                        org.Mm.eg.db_3.11.4                       TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
[17] org.Hs.eg.db_3.11.4                       TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2   GenomicFeatures_1.40.1                    AnnotationDbi_1.50.3                     
[21] Biobase_2.48.0                            ChIPseeker_1.24.0                         clusterProfiler_3.16.0                    BSgenome.Mmusculus.UCSC.mm10_1.4.0       
[25] ggsignif_0.6.0                            chromVAR_1.10.0                           purrr_0.3.4                               RColorBrewer_1.1-2                       
[29] ggsci_2.9                                 readr_1.3.1                               tidyr_1.1.1                               dplyr_1.0.1                              
[33] ggplot2_3.3.2                             TFBSTools_1.26.0                          BSgenome_1.56.0                           rtracklayer_1.48.0                       
[37] Biostrings_2.56.0                         XVector_0.28.0                            GenomicRanges_1.40.0                      GenomeInfoDb_1.24.2                      
[41] IRanges_2.22.2                            S4Vectors_0.26.1                          BiocGenerics_0.34.0                      

loaded via a namespace (and not attached):
  [1] rappdirs_0.3.1              R.methodsS3_1.8.0           bit64_4.0.2                 knitr_1.29                  irlba_2.3.3                 R.utils_2.9.2              
  [7] data.table_1.13.0           KEGGREST_1.28.0             RCurl_1.98-1.2              generics_0.0.2              snow_0.4-3                  cowplot_1.0.0              
 [13] lambda.r_1.2.4              RSQLite_2.2.0               europepmc_0.4               bit_4.0.4                   enrichplot_1.8.1            xml2_1.3.2                 
 [19] httpuv_1.5.4                isoband_0.2.2               assertthat_0.2.1            DirichletMultinomial_1.30.0 viridis_0.5.1               xfun_0.16                  
 [25] hms_0.5.3                   evaluate_0.14               promises_1.1.1              fansi_0.4.1                 progress_1.2.2              caTools_1.18.0             
 [31] dbplyr_1.4.4                readxl_1.3.1                igraph_1.2.5                DBI_1.1.0                   geneplotter_1.66.0          htmlwidgets_1.5.1          
 [37] futile.logger_1.4.3         reshape_0.8.8               ellipsis_0.3.1              backports_1.1.8             annotate_1.66.0             biomaRt_2.44.1             
 [43] vctrs_0.3.2                 abind_1.4-5                 withr_2.2.0                 ggforce_0.3.2               triebeard_0.3.0             GenomicAlignments_1.24.0   
 [49] prettyunits_1.1.1           DOSE_3.14.0                 lazyeval_0.2.2              seqLogo_1.54.3              crayon_1.3.4                genefilter_1.70.0          
 [55] labeling_0.3                pkgconfig_2.0.3             tweenr_1.0.1                nlme_3.1-148                rlang_0.4.7                 lifecycle_0.2.0            
 [61] miniUI_0.1.1.1              downloader_0.4              extrafontdb_1.0             BiocFileCache_1.12.1        cellranger_1.1.0            polyclip_1.10-0            
 [67] lmtest_0.9-37               urltools_1.7.3              carData_3.0-4               boot_1.3-25                 zoo_1.8-8                   base64enc_0.1-3            
 [73] pheatmap_1.0.12             ggridges_0.5.2              png_0.1-7                   viridisLite_0.3.0           bitops_1.0-6                R.oo_1.23.0                
 [79] KernSmooth_2.23-17          blob_1.2.1                  qvalue_2.20.0               rstatix_0.6.0               gridGraphics_0.5-0          CNEr_1.24.0                
 [85] scales_1.1.1                memoise_1.1.0               magrittr_1.5                plyr_1.8.6                  gdata_2.18.0                zlibbioc_1.34.0            
 [91] compiler_4.0.1              scatterpie_0.1.4            plotrix_3.7-8               Rsamtools_2.4.0             cli_2.0.2                   formatR_1.7                
 [97] mgcv_1.8-31                 MASS_7.3-51.6               tidyselect_1.1.0            stringi_1.4.6               forcats_0.5.0               yaml_2.2.1                 
[103] GOSemSim_2.14.1             askpass_1.1                 locfit_1.5-9.4              fastmatch_1.1-0             tools_4.0.1                 rio_0.5.16                 
[109] rstudioapi_0.11             TFMPvalue_0.0.8             foreign_0.8-80              gridExtra_2.3               farver_2.0.3                ggraph_2.0.3               
[115] digest_0.6.25               rvcheck_0.1.8               BiocManager_1.30.10         FNN_1.1.3                   shiny_1.5.0                 pracma_2.2.9               
[121] Rcpp_1.0.5                  car_3.0-8                   broom_0.7.0                 later_1.1.0.1               gdtools_0.2.2               httr_1.4.2                 
[127] colorspace_1.4-1            XML_3.99-0.5                splines_4.0.1               uwot_0.1.8                  graphlayouts_0.7.0          ggplotify_0.0.5            
[133] systemfonts_0.2.3           plotly_4.9.2.1              xtable_1.8-4                jsonlite_1.7.0              futile.options_1.0.1        poweRlaw_0.70.6            
[139] tidygraph_1.2.0             R6_2.4.1                    pillar_1.4.6                htmltools_0.5.0             mime_0.9                    glue_1.4.1                 
[145] fastmap_1.0.1               DT_0.15                     fgsea_1.14.0                utf8_1.1.4                  lattice_0.20-41             tibble_3.0.3               
[151] curl_4.3                    gtools_3.8.2                Rttf2pt1_1.3.8              zip_2.1.0                   GO.db_3.11.4                openxlsx_4.1.5             
[157] openssl_1.4.2               survival_3.2-3              rmarkdown_2.3               munsell_0.5.0               DO.db_2.9                   GenomeInfoDbData_1.2.3     
[163] msigdbr_7.1.1               haven_2.3.1                 reshape2_1.4.4              gtable_0.3.0                extrafont_0.17             
select <- dplyr::select
count <- dplyr::count
rename <- dplyr::rename

Parameters

modify here

# Files


#deftable <- "~/akuwakado/kuwakado/BRBSeq/H3mm18_Dox_0432lane2/Final_Rserver_191203/deftable_BRB_noumi_new_190520_fin191205ver.txt" #Umi補正なし (BRB)

deftable <- "/home/guestA/o70578a/akuwakado/kuwakado/BRBSeq/H3mm18_Dox_0432lane2/Final_Last_Rserver_200811/deftable_BRB_noumi_new_190520_Last20200811ver.txt"

#deftable <- "~/akuwakado/kuwakado/BRBSeq/H3mm18_Dox_0432lane2/Final_Rserver_191203/deftable_BRB_noumi_new_190520_fin191205ver.txt" #Umi補正なし (BRB)


#deftable <- "deftable_BRB_noumi_new_190520.txt" #Umi補正なし (BRB)

#deftable <- "~/akuwakado/kuwakado/BRBSeq/H3mm18_Dox_0432lane2/deftable_BRB_noumi_new_190520.txt" #Umi補正なし (BRB)


## Data selection (filter rows of deftable)
#use <- quo(!grepl("^18",group) & (group != "Nc-minusTryd"))
#use <- quo(TRUE) # use all
use <- quo(type != "C2C12")

# Species specific parameters
species <- "Mus musculus"
biomartann <- "mmusculus_gene_ensembl"
maxchrom <- 19 # 19: mouse, 22: human


# Graphics
# aesthetic mapping of labels
#myaes <- aes(colour=enzyme,shape=leg,label=rep) 
#myaes <- aes(colour=growth,shape=type,size=count) #ラベルなし
#myaes <- aes(colour=growth,shape=type,label=replicate,size=count) #ラベルあり
#myaes <- aes(colour=enzyme,shape=leg,label=replicate) #ラベルあり
#myaes <- aes(colour=enzyme,shape=leg,label=factor(rep))
#myaes <- aes(colour=type, shape=revcro, label=read, size=count)
#myaes <- aes(colour=type, shape=revcro, label=read)

#myaes <- aes(colour=growth,shape=type,label=replicate,size=count) #ラベルあり
#myaes <- aes(colour=time,shape=type,size=count,label=replicate)
#myaes <- aes(colour=WT_KO_intact_CTX, shape=Day,size=count,label=f_m)

#myaes <- aes(colour=WT_KO_intact_CTX, shape=Day, label=f_m) #サイズを変えず
#myaes <- aes(colour=growth,shape=type,label=replicate,size=count) #ラベルあり
myaes <- aes(colour=time,shape=type,label=rep,size=count) #ラベルあり
myaes2 <- aes(colour=time,shape=type) #kuwa add

# color palette of points: See vignette("ggsci")
mycolor <- ggsci::scale_color_aaas()



# PCA/UMAP
scalerows <- TRUE # gene-wise scaling (pattern is the matter?)
ntop <- 500 # number of top-n genes with high variance
seed <- 123 # set another number if UMAP looks not good
n_nei <- 6  # number of neighboring data points in UMAP #ここをどうしたらいい?


# DESeq2
#model <- ~groupn+lead #dateも追加
#model <- ~leg + enzyme + leg:enzyme
#model <- ~type+growth#+type:growth
#model <- ~group+lead


#model <- ~group
#model <- ~type+growth+type:growth #これでは相互作用が入っていない
#model <- ~type+growth #これでは相互作用が入っていない


model <- ~group
#model <- ~type+growth+growth:type

fdr <- 0.1 # acceptable false discovery rate
lfcthreth <- log2(1) # threshold in abs(log2FC)

# controls should be placed in the right
contrast <- list(
  
  group_UI_Doxplus_vs_minus = c("group", "BRB_UI_DoxPlus", "BRB_UI_DoxMinus"),
  group_0h_Doxplus_vs_minus = c("group", "BRB_0h_DoxPlus", "BRB_0h_DoxMinus"),
  group_24h_Doxplus_vs_minus = c("group", "BRB_24h_DoxPlus", "BRB_24h_DoxMinus"),
  group_48h_Doxplus_vs_minus = c("group", "BRB_48h_DoxPlus", "BRB_48h_DoxMinus")
  
  
  #group_UI_Doxplus_vs_minus = c("group", "Doxplus_UI", "Doxminus_UI"),
  #group_Diff0h_Doxplus_vs_minus = c("group", "Doxplus_Diff0h", "Doxminus_Diff0h"),
  #group_Diff24h_Doxplus_vs_minus = c("group", "Doxplus_Diff24h", "Doxminus_Diff24h"),
  #group_Diff48h_Doxplus_vs_minus = c("group", "Doxplus_Diff48h", "Doxminus_Diff48h")
  
  
  #Intercept = list("Intercept"), # reference level
  #leg_LvsR = c("leg", "L", "R"),
  #enz_KvsC = c("enzyme","K","C")
  #legL.enzK = list("legL.enzymeK") # interaction
  
  #type_Doxplus_vs_minus = c("type", "Doxplus", "Doxminus")
)

Retrieve Biomart

if(!exists("e2g")){
  #ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",host="asia.ensembl.org")
  #ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",host="uswest.ensembl.org")
  ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",host="useast.ensembl.org")
  mart <- biomaRt::useDataset(biomartann,mart=ensembl)
  e2g <- biomaRt::getBM(attributes=c("ensembl_gene_id","external_gene_name",
    "gene_biotype","chromosome_name"), mart=mart) %>% as_tibble %>%
  rename(
    ens_gene = ensembl_gene_id,
    ext_gene = external_gene_name,
    biotype = gene_biotype,
    chr = chromosome_name
  )
}
annotate <- partial(right_join,e2g,by="ens_gene")

#-----#
nrow(e2g)
[1] 56305
#readr::write_csv(e2g,"ensemble_list_asia.csv")
#readr::write_csv(e2g,"ensemble_list_uswest.csv")
readr::write_csv(e2g,"ensemble_list_useast.csv")

Load counts

def <- readr::read_tsv(deftable) %>% filter(!!use)
Parsed with column specification:
cols(
  file = col_character(),
  sample0 = col_character(),
  barcode = col_character(),
  growth = col_character(),
  sample = col_character(),
  group = col_character(),
  time = col_character(),
  type = col_character(),
  seq = col_character(),
  rep = col_double()
)
print(def)

#def$growth <- factor(def$growth,levels =c("UI","Diff0h","Diff24h","Diff48h"))
#def$type <- factor(def$type,levels =c("Doxminus","Doxplus"))

#factor(def$growth,levels =c("UI","Diff0h","Diff24h","Diff48h"))
# [1] UI      UI      UI      UI      UI      UI      UI      UI      Diff0h  Diff0h  Diff0h  Diff0h  Diff0h  Diff0h  Diff0h  Diff0h  Diff24h Diff24h Diff24h Diff24h
#[21] Diff24h Diff24h Diff24h Diff24h Diff48h Diff48h Diff48h Diff48h Diff48h Diff48h Diff48h Diff48h
#Levels: UI Diff0h Diff24h Diff48h


####--- New ---#### (no UMI ?)
# Set reference levels according to the contrast
for(x in keep(contrast,is.character))
  def[[x[1]]] <- relevel(factor(def[[x[1]]]),x[3])

umi <- def$file %>% unique %>% tibble(file=.) %>% 
  dplyr::mutate(data=map(file,readr::read_tsv,progress=FALSE)) %>%
  unnest() %>% dplyr::rename(barcode=cell) %>%
  dplyr::inner_join(select(def,file,barcode,sample),.,c("file","barcode")) %>%
  select(-file,-barcode) %>% dplyr::rename(ens_gene=gene)
Parsed with column specification:
cols(
  gene = col_character(),
  cell = col_character(),
  count = col_double()
)
`cols` is now required when using unnest().
Please use `cols = c(data)`
print(umi)

## sample, barcode, file を忘れずに!

mat <- umi %>% annotate %>%
  dplyr::mutate(chr=factor(chr,c(1:maxchrom,"X","Y","MT"))) %>%
  filter(!is.na(chr)) %>% spread(sample,count,fill=0)

## to check read vias, this add read number as "n" column (2019/4/19)
def <- umi %>% count(sample,wt=count) %>% dplyr::inner_join(def,.) %>% dplyr::rename(count=n)
Joining, by = "sample"
####-----------#### 




# Set reference levels according to the contrast
#for(x in keep(contrast,is.character))
#  def[[x[1]]] <- relevel(factor(def[[x[1]]]),x[3])

#umi <- def$file %>% unique %>% tibble(file=.) %>% 
#  mutate(data=map(file,readr::read_tsv,progress=FALSE)) %>%
#  unnest() %>% dplyr::rename(barcode=cell) %>%
#  inner_join(select(def,file,barcode,sample),.,c("file","barcode")) %>%
#  select(-file,-barcode) %>% dplyr::rename(ens_gene=gene)

#mat <- umi %>% annotate %>%
#  mutate(chr=factor(chr,c(1:maxchrom,"X","Y","MT"))) %>%
#  filter(!is.na(chr)) %>% spread(sample,count,fill=0)

print(mat)

## to check read vias, this add read number as "n" column (2019/4/19)
#def <- umi %>% count(sample,wt=count) %>% inner_join(def,.) %>% dplyr::rename(count=n)

print(def)


##====================================##
# 確認 (20191204) 2つの値は一緒か?
# 生のデータカウント中の遺伝子総数

umi %>% group_by(ens_gene) %>% summarize %>% nrow()
`summarise()` ungrouping output (override with `.groups` argument)
[1] 21871
umi %>% spread(sample,count,fill=0) %>% nrow()
[1] 21871
mat %>% nrow()
[1] 21742
mat %>% filter(chr!="MT") %>% nrow() # MTなし
[1] 21707
# matでは、chr等が不明なものは省いている。
# DEGでは、さらにMTも省いている。
##====================================##

Reads breakdown

Total reads

bychr <- mat %>% select(-(1:3)) %>%
  gather("sample","count",-chr) %>%
  group_by(chr,sample) %>% summarise(total=sum(count)) %>% ungroup
`summarise()` regrouping output by 'chr' (override with `.groups` argument)
ggplot(bychr,aes(reorder(sample,dplyr::desc(sample)),total/1e6,fill=chr)) +
  theme_linedraw() + geom_bar(stat="identity") + coord_flip() +
  xlab("sample") + ylab("million reads") + ggsci::scale_fill_igv() +
  scale_x_discrete(limits = rev(levels(sample)))

NA
NA

Biotype

bt <- mat %>% select(-c(1,2,4)) %>% group_by(biotype) %>%
  summarise_all(sum) %>% filter_at(-1,any_vars(. > 1000))
bt %>% tibble::column_to_rownames("biotype") %>%
  as.matrix %>% t %>% mosaicplot(las=2,shade=TRUE)

Correlations

drop rows with all 0 -> +1/2 -> geom.scale -> log -> Pearson’s

matf <- mat %>% filter(chr!="MT") %>% filter_at(-(1:4),any_vars(. > 0))
X <- matf %>% select(-(1:4)) %>% as.matrix
rownames(X) <- matf$ens_gene
lX <- log(gscale(X+0.5))
R <- cor(lX); diag(R) <- NA
pheatmap::pheatmap(R,color=viridis::viridis(256))

Dimension reduction

# set scale=TRUE if the patterns (not level) is the matter
p <- prcomp(t(lX[rank(-apply(lX,1,var)) <= ntop,]),scale=scalerows,center=TRUE)
screeplot(p,las=2,main="Importance")

print(summary(p)$imp[,seq(min(10,ncol(X)))])
                            PC1      PC2      PC3      PC4      PC5      PC6      PC7      PC8      PC9     PC10
Standard deviation     15.29237 6.164198 4.042076 3.825442 3.469872 3.401714 3.278207 3.191635 3.138257 3.038142
Proportion of Variance  0.46771 0.075990 0.032680 0.029270 0.024080 0.023140 0.021490 0.020370 0.019700 0.018460
Cumulative Proportion   0.46771 0.543710 0.576380 0.605650 0.629730 0.652880 0.674370 0.694740 0.714440 0.732900
label <- def %>% filter(sample %in% colnames(X))
df <- data.frame(p$x) %>% as_tibble(rownames="sample") %>%
  inner_join(label,.) %>% select(-file)
Joining, by = "sample"
print(df)
ggpoints(df,modifyList(aes(PC1,PC2),myaes))

set.seed(seed)
um <- uwot::umap(p$x,n_nei,2)
df <- as_tibble(um) %>% rename(UMAP1=V1,UMAP2=V2) %>% bind_cols(df)
ggpoints(df,modifyList(aes(UMAP1,UMAP2),myaes))


print(df)

##  kuwakado 変更 ##
ggpoints <- function(x,...) 
  ggplot(x,...) + geom_point(stroke=1) + theme_minimal() + mycolor

#ggpoints(df,modifyList(aes(PC1,PC2),myaes2))
#set.seed(seed)
#um <- uwot::umap(p$x,n_nei,2)
#df <- as_tibble(um) %>% rename(UMAP1=V1,UMAP2=V2) %>% bind_cols(df)
#ggpoints(df,modifyList(aes(UMAP1,UMAP2),myaes2))
## ## ## ##

DESeq2

Fit model

dds <- DESeq2::DESeqDataSetFromMatrix(X[,label$sample],label,model)
converting counts to integer mode
dds <- DESeq2::DESeq(dds)
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
-- note: fitType='parametric', but the dispersion trend was not well captured by the
   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.
final dispersion estimates
fitting model and testing
#=====#

dds <- DESeq2::estimateSizeFactors(dds)
norm <- DESeq2::counts(dds,normalized=TRUE) #DEGを取った後のクラスタリングに使う。

normalizedcount <- as.data.frame(norm) %>% tibble::rownames_to_column("ens_gene") %>% as_tibble
readr::write_csv(normalizedcount, "./H3mm18KO_3T3_Dox_normCount.csv")

normalizedcount %>% inner_join(e2g, by = "ens_gene")  %>% dplyr::select("ens_gene","ext_gene", "biotype","chr", all_of(label$sample)) %>% readr::write_csv("./H3mm18KO_3T3_Dox_normCount_genename.csv")

#count_dds <- estimateSizeFactors(dds)
#counts(count_dds, normalized=TRUE)

####--- + size factors を書き出し ------------------####
as.data.frame(DESeq2::sizeFactors(dds))  %>% tibble::rownames_to_column("sample") %>% readr::write_csv("./H3mm18KO_3T3_Dox__sizefactors.csv")

vst => z score


vsd <- DESeq2::vst(dds) #normalized countが入っている。(vstかrlog)
Xd <- SummarizedExperiment::assay(vsd) # 全て選択(200326) 20190920を元に (191024)
Xs <- Xd %>% t %>% scale %>% t

zscore <- Xs %>% as.data.frame() %>% tibble::rownames_to_column("ens_gene") %>% as_tibble
zscore_type <- zscore  %>% annotate %>% dplyr::select("ens_gene","ext_gene", "biotype","chr", all_of(label$sample))



readr::write_csv(zscore, "H3mm18KO_3T3_Dox__zscore_all.csv")
readr::write_csv(zscore_type, "H3mm18KO_3T3_Dox__zscore_type_all.csv")

nrow(zscore_type)
[1] 21707

Diagnostics plot

DESeq2::sizeFactors(dds) %>%
  {tibble(sample=names(.),sizeFactor=.)} %>%
  ggplot(aes(sample,sizeFactor)) + theme_minimal() +
  geom_bar(stat="identity") + coord_flip()

DESeq2::plotDispEsts(dds)

Extract results

191205修正

res <- mapply(function(x)
  DESeq2::results(dds,x,lfcThreshold=lfcthreth,alpha=fdr)
,contrast)

print(fdr)
[1] 0.1
# 200811修正
re_all <- map(res,as_tibble,rownames="ens_gene") %>%
  tibble(aspect=factor(names(.),names(.)),data=.) %>%
  mutate(data=map(data,annotate)) %>%
  unnest(cols = "data")

re <- re_all %>% filter(padj<fdr) #191120修正 unnest() 


#re <- map(res,as_tibble,rownames="ens_gene") %>%
#  tibble(aspect=factor(names(.),names(.)),data=.) %>%
#  mutate(data=map(data,annotate)) %>%
#  unnest(cols = "data") %>% filter(padj<fdr) #191120修正 unnest() 

fc <- re %>% select(1:7) %>% spread(aspect,log2FoldChange,fill=0)

imap(res,~{
  cat(paste0("-- ",.y," --"))
  DESeq2::summary(.x) #191120修正 DESeq2::summary.DESeqResults(.x)
}) %>% invisible
-- group_UI_Doxplus_vs_minus --
out of 21707 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 3, 0.014%
LFC < 0 (down)     : 0, 0%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

-- group_0h_Doxplus_vs_minus --
out of 21707 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 34, 0.16%
LFC < 0 (down)     : 40, 0.18%
outliers [1]       : 0, 0%
low counts [2]     : 18938, 87%
(mean count < 41)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

-- group_24h_Doxplus_vs_minus --
out of 21707 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 39, 0.18%
LFC < 0 (down)     : 51, 0.23%
outliers [1]       : 0, 0%
low counts [2]     : 18096, 83%
(mean count < 31)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

-- group_48h_Doxplus_vs_minus --
out of 21707 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 42, 0.19%
LFC < 0 (down)     : 54, 0.25%
outliers [1]       : 0, 0%
low counts [2]     : 17255, 79%
(mean count < 24)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

GSEA

msig <- msigdbr::msigdbr(species)
fgsea_msig <- partial(fgsea::fgsea,with(msig,split(gene_symbol,gs_name)))
gscat <- msig %>% select(gs_name,gs_cat,gs_subcat) %>%
  distinct() %>% rename(pathway=gs_name)

#gsea <- re %>% filter(aspect!="Intercept") %>% group_by(aspect) %>%
#  summarise(l2fc=list(setNames(log2FoldChange,ext_gene))) %>%
#  mutate(gse=map(l2fc,fgsea_msig,nperm=10000,maxSize=500)) %>%
#  select(-l2fc) %>% unnest %>% arrange(-NES) %>% right_join(gscat,.) %>%
#  mutate(leadingEdge=map_chr(leadingEdge,paste,collapse=","))

# gsea 修正ver [20190621]
#gsea <- re %>% filter(aspect!="Intercept") %>% group_by(aspect) %>%
#  summarise(l2fc=list(setNames(log2FoldChange,ext_gene))) %>%
#  mutate(gse=map(l2fc,fgsea_msig,nperm=10000,maxSize=500)) %>%
#  select(-l2fc) %>% unnest %>% arrange(-NES) %>% right_join(gscat,.) %>%
#  mutate(leadingEdge=map_chr(leadingEdge,paste,collapse=",")) %>%
#  group_by(aspect,gs_cat,gs_subcat) %>%
#  mutate(padj=p.adjust(pval,"BH")) %>% ungroup()

# gsea 修正ver [20190627]
gsea <- re %>% filter(aspect!="Intercept") %>% group_by(aspect) %>%
  summarise(l2fc=list(setNames(log2FoldChange,ext_gene))) %>%
  filter(map(l2fc,length)>10) %>%
  mutate(gse=map(l2fc,fgsea_msig,nperm=10000,maxSize=500)) %>%
  select(-l2fc) %>% unnest %>% arrange(-NES) %>% right_join(gscat,.) %>%
  mutate(leadingEdge=map_chr(leadingEdge,paste,collapse=",")) %>%
  group_by(aspect,gs_cat,gs_subcat) %>%
  mutate(padj=p.adjust(pval,"BH")) %>% ungroup()
`summarise()` ungrouping output (override with `.groups` argument)
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
 警告メッセージ: 
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 警告メッセージ: 
 警告メッセージ: 
 base::options(global_options) で:  base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled

  警告メッセージ: 
 'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で:  base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled

  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
 警告メッセージ: 
 警告メッセージ: 
 base::options(global_options) で:  base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled

  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で:  警告メッセージ: 

  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
WARNING: ignoring environment value of R_HOME
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 警告メッセージ: 
 base::options(global_options) で:  base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled

  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 警告メッセージ: 
 base::options(global_options) で:  base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled

  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
 警告メッセージ: 
 base::options(global_options) で: 
  'options(stringsAsFactors = TRUE)' is deprecated and will be disabled
Joining, by = "pathway"
hallmark <- gsea %>% filter(gs_cat=="H") %>%
  mutate(pathway=sub("^HALLMARK_","",pathway)) %>% 
  group_by(aspect) %>% nest %>%
  mutate(plt=map2(data,aspect,~
    ggplot(.x,aes(reorder(pathway,NES),NES,fill=padj<0.1)) +
    ggtitle(.y) + xlab("Hallmark gene sets") +
    geom_bar(stat="identity") + theme_minimal() + coord_flip() +
    theme(legend.position = "none") + ggsci::scale_fill_aaas())
  )
print(hallmark$plt)
[[1]]

[[2]]

[[3]]

See MSigDB Collections: http://software.broadinstitute.org/gsea/msigdb/collections.jsp

Write-out tables


#csvfilepath <- "BRB0432lane2noumi_H3mm18_Dox"

if(exists("fc"))   readr::write_csv(fc,"./2gun/BRB0432lane2noumi_H3mm18_Dox_l2fc_fdr0p1__final191205_last200811.csv")
if(exists("re"))   readr::write_csv(re,"./2gun/BRB0432lane2noumi_H3mm18_Dox_results_fdr0p1__final191205_last200811.csv")
if(exists("re_all"))   readr::write_csv(re_all,"./2gun/BRB0432lane2noumi_H3mm18_Dox_resultsall_fdr0p1__final191205_last200811.csv")
if(exists("gsea")) readr::write_csv(gsea,"./2gun/BRB0432lane2noumi_H3mm18_Dox_gsea_fdr0p1__final191205_last200811.csv")

Write-out tables Hallmark

#gseaのHallmarkのみ書き出し
hallmark_gsea <- gsea %>% filter(gs_cat=="H") %>% mutate(pathway=sub("^HALLMARK_","",pathway)) %>% group_by(aspect)
if(exists("hallmark_gsea")) readr::write_csv(hallmark_gsea,"./2gun/BRB0432lane2noumi_H3mm18_Dox_hallmark_gsea_fdr0p1__final191205_last200811.csv")

MAplot

5*7 inch


#maplot <- DESeq2::plotMA(res$group_UI_Doxplus_vs_minus, ylim=c(-2,2))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_UI_Doxplus_vs_minus, ylim=c(-4,4), main="UI_Doxplus_vs_minus")

print(maplot)
NULL
#maplot <- DESeq2::plotMA(res$group_Diff0h_Doxplus_vs_minus , ylim=c(-2,2))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_0h_Doxplus_vs_minus, ylim=c(-4,4), main="0h_Doxplus_vs_minus")

print(maplot)
NULL
#maplot <- DESeq2::plotMA(res$group_Diff24h_Doxplus_vs_minus, ylim=c(-2,2))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_24h_Doxplus_vs_minus, ylim=c(-4,4), main="24h_Doxplus_vs_minus")

print(maplot)
NULL
#maplot <- DESeq2::plotMA(res$group_Diff48h_Doxplus_vs_minus, ylim=c(-2,2))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_48h_Doxplus_vs_minus, ylim=c(-4,4), main="48h_Doxplus_vs_minus")

print(maplot)
NULL

尤度比検定

###Fit model LRT (BRBと同じ設定) ATACのフォーマットを持ってきた

def_list_select <- def %>% mutate(time=factor(time, c("UI","0h","24h","48h"))) %>% mutate(type=factor(type, c("DoxMinus","DoxPlus")))

#def_list_select <- def %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxminus","Doxplus")))

#def_list_select <- def

dds0 <- 0
dds1_2 <- 0
res1_2 <- 0


model_full <- ~growth*type # BRB
model_reduced <- ~growth # BRB

#model_full <- ~time*type # full model  (~growth*type # BRB)
#model_reduced <- ~time # reduced model (~growth # BRB)

colnames(X) #使用するサンプル
 [1] "BRB_0h_DoxMinus_1"  "BRB_0h_DoxMinus_2"  "BRB_0h_DoxMinus_3"  "BRB_0h_DoxMinus_4"  "BRB_0h_DoxPlus_1"   "BRB_0h_DoxPlus_2"   "BRB_0h_DoxPlus_3"   "BRB_0h_DoxPlus_4"  
 [9] "BRB_24h_DoxMinus_1" "BRB_24h_DoxMinus_2" "BRB_24h_DoxMinus_3" "BRB_24h_DoxMinus_4" "BRB_24h_DoxPlus_1"  "BRB_24h_DoxPlus_2"  "BRB_24h_DoxPlus_3"  "BRB_24h_DoxPlus_4" 
[17] "BRB_48h_DoxMinus_1" "BRB_48h_DoxMinus_2" "BRB_48h_DoxMinus_3" "BRB_48h_DoxMinus_4" "BRB_48h_DoxPlus_1"  "BRB_48h_DoxPlus_2"  "BRB_48h_DoxPlus_3"  "BRB_48h_DoxPlus_4" 
[25] "BRB_UI_DoxMinus_1"  "BRB_UI_DoxMinus_2"  "BRB_UI_DoxMinus_3"  "BRB_UI_DoxMinus_4"  "BRB_UI_DoxPlus_1"   "BRB_UI_DoxPlus_2"   "BRB_UI_DoxPlus_3"   "BRB_UI_DoxPlus_4"  
# full model
dds0_LRT <- DESeq2::DESeqDataSetFromMatrix(X[,def_list_select$sample],def_list_select,model_full)  #full model
converting counts to integer mode
some variables in design formula are characters, converting to factors
# reduced model
dds_LRT <- DESeq2::DESeq(dds0_LRT, test="LRT", reduced=model_reduced) #reduced model
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
-- note: fitType='parametric', but the dispersion trend was not well captured by the
   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.
final dispersion estimates
fitting model and testing
res_LRT <- DESeq2::results(dds_LRT)
res_LRT_fdr0p1 <- DESeq2::results(dds_LRT)  #クラスタリングに使用
res_LRT_fdr0p2 <- DESeq2::results(dds_LRT,alpha=0.2)

print(model.matrix(model_full, def_list_select)) #full model
   (Intercept) growthDiff24h growthDiff48h growthUI typeDoxPlus growthDiff24h:typeDoxPlus growthDiff48h:typeDoxPlus growthUI:typeDoxPlus
1            1             0             0        1           0                         0                         0                    0
2            1             0             0        1           0                         0                         0                    0
3            1             0             0        1           0                         0                         0                    0
4            1             0             0        1           0                         0                         0                    0
5            1             0             0        1           1                         0                         0                    1
6            1             0             0        1           1                         0                         0                    1
7            1             0             0        1           1                         0                         0                    1
8            1             0             0        1           1                         0                         0                    1
9            1             0             0        0           0                         0                         0                    0
10           1             0             0        0           0                         0                         0                    0
11           1             0             0        0           0                         0                         0                    0
12           1             0             0        0           0                         0                         0                    0
13           1             0             0        0           1                         0                         0                    0
14           1             0             0        0           1                         0                         0                    0
15           1             0             0        0           1                         0                         0                    0
16           1             0             0        0           1                         0                         0                    0
17           1             1             0        0           0                         0                         0                    0
18           1             1             0        0           0                         0                         0                    0
19           1             1             0        0           0                         0                         0                    0
20           1             1             0        0           0                         0                         0                    0
21           1             1             0        0           1                         1                         0                    0
22           1             1             0        0           1                         1                         0                    0
23           1             1             0        0           1                         1                         0                    0
24           1             1             0        0           1                         1                         0                    0
25           1             0             1        0           0                         0                         0                    0
26           1             0             1        0           0                         0                         0                    0
27           1             0             1        0           0                         0                         0                    0
28           1             0             1        0           0                         0                         0                    0
29           1             0             1        0           1                         0                         1                    0
30           1             0             1        0           1                         0                         1                    0
31           1             0             1        0           1                         0                         1                    0
32           1             0             1        0           1                         0                         1                    0
attr(,"assign")
[1] 0 1 1 1 2 3 3 3
attr(,"contrasts")
attr(,"contrasts")$growth
[1] "contr.treatment"

attr(,"contrasts")$type
[1] "contr.treatment"
print(model.matrix(model_reduced, def_list_select)) #reduced model
   (Intercept) growthDiff24h growthDiff48h growthUI
1            1             0             0        1
2            1             0             0        1
3            1             0             0        1
4            1             0             0        1
5            1             0             0        1
6            1             0             0        1
7            1             0             0        1
8            1             0             0        1
9            1             0             0        0
10           1             0             0        0
11           1             0             0        0
12           1             0             0        0
13           1             0             0        0
14           1             0             0        0
15           1             0             0        0
16           1             0             0        0
17           1             1             0        0
18           1             1             0        0
19           1             1             0        0
20           1             1             0        0
21           1             1             0        0
22           1             1             0        0
23           1             1             0        0
24           1             1             0        0
25           1             0             1        0
26           1             0             1        0
27           1             0             1        0
28           1             0             1        0
29           1             0             1        0
30           1             0             1        0
31           1             0             1        0
32           1             0             1        0
attr(,"assign")
[1] 0 1 1 1
attr(,"contrasts")
attr(,"contrasts")$growth
[1] "contr.treatment"
head(res_LRT[order(res_LRT$pvalue), ])
log2 fold change (MLE): growthUI.typeDoxPlus 
LRT p-value: '~ growth * type' vs '~ growth' 
DataFrame with 6 rows and 6 columns
                    baseMean log2FoldChange     lfcSE       stat      pvalue        padj
                   <numeric>      <numeric> <numeric>  <numeric>   <numeric>   <numeric>
ENSMUSG00000113980  902.4102       0.948565  0.361248 12465.0670 0.00000e+00 0.00000e+00
ENSMUSG00000108686   12.4124      -0.268131  1.551478   274.2590 3.85216e-58 2.07342e-54
ENSMUSG00000035783  990.6555       0.268978  0.107580   166.3410 6.37769e-35 2.28853e-31
ENSMUSG00000061723  362.9786       0.719623  0.666752   145.0557 2.33349e-30 6.27999e-27
ENSMUSG00000029304 1088.1244       0.158544  0.123811    83.7472 2.79716e-17 6.02228e-14
ENSMUSG00000028972   67.7825      -1.170201  0.298094    65.5466 1.97388e-13 3.54147e-10
DESeq2::summary(res_LRT_fdr0p1) #20191108修正

out of 21707 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 125, 0.58%
LFC < 0 (down)     : 101, 0.47%
outliers [1]       : 0, 0%
low counts [2]     : 10942, 50%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
DESeq2::summary(res_LRT_fdr0p2) #20191108修正

out of 21707 with nonzero total read count
adjusted p-value < 0.2
LFC > 0 (up)       : 202, 0.93%
LFC < 0 (down)     : 179, 0.82%
outliers [1]       : 0, 0%
low counts [2]     : 10942, 50%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
#DESeq2::summary.DESeqResults(res_LRT_fdr0p1)
#DESeq2::summary.DESeqResults(res_LRT_fdr0p2)

尤度比検定 結果

result LRT


folder_name_plot_path <- "./LRT/"
csvfilepath <- "BRB0432lane2noumi_H3mm18_Dox"

allres_LRT <- 0

#----- 全ての結果 -----#
allres_LRT <- as_tibble(res_LRT,rownames="ens_gene") %>% right_join(e2g,.)
Joining, by = "ens_gene"
file_path <- paste(folder_name_plot_path, "all__", csvfilepath, "_last200811.csv",sep="")
readr::write_csv(allres_LRT, file_path) # 全ての結果

nrow(allres_LRT)
[1] 21707
nrow(allres_LRT %>% filter(padj<0.1))
[1] 226
nrow(allres_LRT %>% filter(padj<0.2))
[1] 381
#----- fdr 0.1の結果 -----#
LRT_deglist_fdr0p1 <- as_tibble(res_LRT_fdr0p1,rownames="ens_gene") %>% right_join(e2g,.) %>% filter(padj<0.1) #クラスタリングに使用
Joining, by = "ens_gene"
file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_last200811.csv",sep="") # 今回取得したDEGのリスト
readr::write_csv(LRT_deglist_fdr0p1, file_path) # 全ての結果
nrow(LRT_deglist_fdr0p1)
[1] 226
#----- fdr 0.2の結果 -----#
LRT_deglist_fdr0p2 <- as_tibble(res_LRT_fdr0p2,rownames="ens_gene") %>% right_join(e2g,.) %>% filter(padj<0.2)
Joining, by = "ens_gene"
file_path <- paste(folder_name_plot_path, "DEG_fdr0p2__", csvfilepath, "_last200811.csv",sep="") # 今回取得したDEGのリスト
readr::write_csv(LRT_deglist_fdr0p2, file_path) # 全ての結果
nrow(LRT_deglist_fdr0p2)
[1] 381
LRT_deglist_fdr0p2 <- NA

# BRBでは right_join(e2g,.)、ATACでは right_join(ensemble,.)

Clustering LRT

#20191205修正と作成

# 全てのデータ

#-- 上で実行 --#
#dds_LRT <- DESeq2::DESeq(dds0_LRT, test="LRT", reduced=model_reduced) #reduced model
#res_LRT <- DESeq2::results(dds_LRT)
#res_LRT_fdr0p1 <- DESeq2::results(dds_LRT)
#res_LRT_fdr0p2 <- DESeq2::results(dds_LRT,alpha=0.2)
#--------------#

#vsd_LRT <- DESeq2::vst(dds_LRT)

#Xd_LRT <- SummarizedExperiment::assay(vsd_LRT)[which(res_LRT_fdr0p1$padj<0.1),] #degのリスト #Xd <- SummarizedExperiment::assay(vsd)[which(res$padj<0.1),]
#Xs_LRT <- Xd_LRT %>% t %>% scale %>% t

#-- クラスタリングに使用したXd,Xsを書き出し --#
# 20191212作成 (あとで確認できるように)
# LRT_deglist_fdr0p1

#file_path <- paste(folder_name_plot_path, "clustering_vsdLRTall__", csvfilepath, ".csv",sep="") 
#SummarizedExperiment::assay(vsd_LRT) %>% as_tibble(rownames="ens_gene") %>% readr::write_csv(.,file_path)

#file_path <- paste(folder_name_plot_path, "clustering_XdLRTfdr0p1__", csvfilepath, ".csv",sep="") 
#Xd_LRT %>% as_tibble(rownames="ens_gene") %>% readr::write_csv(.,file_path) 
# これと同じ: SummarizedExperiment::assay(vsd_LRT) %>% as_tibble(rownames="ens_gene")  %>% filter(ens_gene %in% LRT_deglist_fdr0p1$ens_gene)

#--------#

#file_path <- paste(folder_name_plot_path, "clustering_XsLRTall__", csvfilepath, ".csv",sep="")
#SummarizedExperiment::assay(vsd_LRT) %>% t %>% scale %>% t %>% as_tibble(rownames="ens_gene") %>% readr::write_csv(.,file_path)

#file_path <- paste(folder_name_plot_path, "clustering_XsLRTfdr0p1__", csvfilepath, ".csv",sep="") 
#Xs_LRT %>% as_tibble(rownames="ens_gene") %>% readr::write_csv(.,file_path)
# これと同じ: SummarizedExperiment::assay(vsd_LRT) %>% t %>% scale %>% t %>% as_tibble(rownames="ens_gene") %>% filter(ens_gene %in% LRT_deglist_fdr0p1$ens_gene)
#---------------------------------------------#


zscore_type_LRT <- zscore_type %>% filter(ens_gene %in% LRT_deglist_fdr0p1$ens_gene)
nrow(zscore_type_LRT)
[1] 226
Xs_LRT <- zscore_type_LRT %>% dplyr::select(-ens_gene,-ext_gene, -biotype,-chr) %>% as.matrix()
rownames(Xs_LRT) <- zscore_type_LRT$ens_gene



##--------- clustering -----------#
set.seed(3)


km_LRT <- kmeans(Xs_LRT,4,nstart = 25,algorithm = "Lloyd")
 10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした  10 回の反復を行いましたが収束しませんでした 
kmc_LRT <- km_LRT$centers %>% as_tibble(rownames="cluster") %>% gather(sample,val,-cluster) %>% inner_join(def)
Joining, by = "sample"
#kmc_LRT_group <- kmc_LRT %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#kmc_LRT_group <- kmc_LRT_group %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#kmc_LRT_group <- kmc_LRT_group %>% mutate(time=factor(time, c("UI","0h","24h","48h")))

kmc_LRT_group <- kmc_LRT %>% mutate(time=factor(time, c("UI", "0h","24h","48h"))) %>% mutate(type=factor(type,c("DoxPlus","DoxMinus"))) %>% mutate(rep=factor(rep, c("1", "2", "3", "4")))


gggglabel <- paste("3T3 Dox +-, k-means","[1]",km_LRT$size[1],"[2]",km_LRT$size[2],"[3]",km_LRT$size[3],"[4]",km_LRT$size[4],sep=" ")


#------- size -------#

print(km_LRT$size)  #4つのクラスター [1] 47 55 94 33
[1] 42 54 93 37
rrres_LRT <- km_LRT$cluster %>% tibble(ens_gene=names(.),cluster=.) %>% right_join(e2g,.) %>% arrange(cluster)
Joining, by = "ens_gene"
file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__kmeans_cluster.csv",sep="") 
readr::write_csv(rrres_LRT,file_path)

##------- PCA -------#

pcacluster_save <- prcomp(Xs_LRT)$x %>% as_tibble %>% select(PC1,PC2) %>% mutate(cluster=km_LRT$cluster) %>% ggplot(aes(PC1,PC2,colour=factor(cluster)))+geom_point(size=1.5,alpha=0.6)+coord_fixed()+theme_linedraw()+ggsci::scale_color_d3("category20")

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__kmeans__pcacluster_PC1PC2.pdf",sep="") 
ggsave(plot=pcacluster_save,file=file_path, width = 10, height = 6, dpi = 120)
print(pcacluster_save)

pcacluster_save <- prcomp(Xs_LRT)$x %>% as_tibble %>% select(PC1,PC3) %>% mutate(cluster=km_LRT$cluster) %>% ggplot(aes(PC1,PC3,colour=factor(cluster)))+geom_point(size=1.5,alpha=0.6)+coord_fixed()+theme_linedraw()+ggsci::scale_color_d3("category20")

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__kmeans__pcacluster_PC1PC3.pdf",sep="") 
ggsave(plot=pcacluster_save,file=file_path, width = 10, height = 6, dpi = 120)

print(pcacluster_save)


#================================================#
# mouseCTX 0438を参考に。

#------------------#
f_cluster <- function(x) x %>% group_by(group, type, time, cluster) %>% summarise(avg=mean(val),se=sd(val)/sqrt(length(val))) %>% ungroup()
print(kmc_LRT_group %>% group_by(group, type, time) %>% summarise())
`summarise()` regrouping output by 'group', 'type' (override with `.groups` argument)
f_clusterp <- function(x) x %>% group_by(group, type, time, cluster) %>% summarise(avg=mean(val),se=sd(val)/sqrt(length(val))) %>% ungroup()
print(kmc_LRT_group %>% group_by(group, type, time) %>% summarise()) #作図用
`summarise()` regrouping output by 'group', 'type' (override with `.groups` argument)
#-------#

cluster_save <- kmc_LRT_group %>%
ggplot(aes(time,val,group=type,colour=type))+geom_line(aes(x=time,y=avg,colour=type),data=f_cluster)+geom_point()+facet_wrap(~cluster,2)+ggsci::scale_color_npg()+theme_minimal()+ theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1))  + ggtitle(gggglabel)

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_type.pdf",sep="") 
ggsave(plot=cluster_save,file=file_path, width = 6, height = 6, dpi = 120)

print(cluster_save)

#-------#
cluster_save <- kmc_LRT_group %>%
ggplot(aes(time,val,group=type,colour=type))+geom_point()+facet_wrap(~cluster,2)+ggsci::scale_color_npg()+theme_minimal()+ theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1))  + ggtitle(gggglabel)


file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_type_nonline.pdf",sep="") 
ggsave(plot=cluster_save,file=file_path, width = 6, height = 6, dpi = 120)

#------------------#
#================================================#



##--------- リストを保存 -------------#
## degの情報も付け加える (20191204 ver)。

#---- LRTの情報 baseMean等を取り出す ----#
#LRT_deglist_fdr0p1 <- as_tibble(res_LRT_fdr0p1,rownames="ens_gene") %>% right_join(e2g,.) %>% filter(padj<0.1) #クラスタリングに使用
#LRT_deglist_fdr <- as_tibble(res_LRT,rownames="ens_gene") %>% filter(padj<0.1)

LRT_deglist_fdr <- LRT_deglist_fdr0p1 %>% select(-(ext_gene),-(biotype),-(chr))
print(fdr)
[1] 0.1
nrow(LRT_deglist_fdr0p1)
[1] 226
#------------------------#

arrange_rrres_LRT <- rrres_LRT %>% arrange(ens_gene)
cluster_resLRT_fdr <- full_join(arrange_rrres_LRT, LRT_deglist_fdr, by="ens_gene") %>% arrange(cluster)
nrow(cluster_resLRT_fdr)
[1] 226
file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_result.csv",sep="")
readr::write_csv(cluster_resLRT_fdr,file_path)

#-- 確認 --#
arrange_rrres_LRT %>% filter(ext_gene %in% c("Myh3","Ckm","Acta1","Tnnt2","Actb","Csrp3","Tpm2","Nsdhl"))
##------------------------------------#
#20191205修正、作成

#===============================#
# mouseCTX 0438を参考に。

# strip.text.x = element_text(size=24,face="italic")
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合

cluster_save <- kmc_LRT_group %>% ggplot(aes(time,val,group=type,colour=type))+geom_line(aes(x=time,y=avg,colour=type),data=f_clusterp)+geom_point(size=2)+facet_wrap(~cluster,1) +theme_bw() + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24), legend.position = "top",  plot.title=element_text(size=10)) +ggsci::scale_color_npg() + ggtitle(gggglabel)

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_type__final.pdf",sep="") 
ggsave(plot=cluster_save,file=file_path, width = 12, height = 6, dpi = 120)
print(cluster_save)
#------------------#

cluster_save <- kmc_LRT_group %>% ggplot(aes(time,val,group=type,colour=type)) + geom_abline(intercept=0,slope=0,linetype="dashed",colour="gray") + geom_line(aes(x=time,y=avg,colour=type),data=f_clusterp) + geom_point(size=2)+facet_wrap(~cluster,1) +theme_bw() + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24), legend.position = "top",  plot.title=element_text(size=10)) +ggsci::scale_color_npg() + ggtitle(gggglabel)


file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_type__final_dash.pdf",sep="") 
ggsave(plot=cluster_save,file=file_path, width = 12, height = 6, dpi = 120)

print(cluster_save)

#------------------#

TPM plot

(CTX day5 DEG Up Down ver)

calculate TPM

# 2019 12月修正
# カウント0も表示するように変更 (20190722) BRB-seq_0432lane2_QC_tmpl_v6r190722_noumi_H3mm18_Dox_linear_0722 より

tpm <- umi %>% group_by(sample) %>% mutate(sample_total=sum(count),TPM=count/sum(count)*1e6) %>% ungroup
tpm_zero <- tpm %>% select(sample,ens_gene,TPM) %>% spread(sample,TPM,fill=0) %>% gather(sample, TPM, -ens_gene) #カウント0のサンプルは0を入れる 20190722追加して修正

tpm_def <- def %>% select(-count) %>% dplyr::inner_join(tpm_zero, by = "sample") #tpmをtpm_zeroに修正、20190722修正

#f <- function(x) x %>% group_by(group,type,growth,ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup() #重要(平均を求める、geom_smooth(se=FALSE)を使わないver)

#-- 確認 --#
umi %>% group_by(sample) %>% summarise(sum(count)) # 確認
`summarise()` ungrouping output (override with `.groups` argument)
tpm_zero %>% group_by(sample) %>% summarise(sum(TPM)) # 確認
`summarise()` ungrouping output (override with `.groups` argument)
#---------#
matome <- tpm_def %>% inner_join(e2g, by = "ens_gene") #191203

readr::write_csv(matome,paste(csvfilepath, "__TPM__final191205_last200811.csv",sep="")) #191203 追加

#readr::write_csv(matome,"BRB0438noumi_190515-H3mm18KO_CTX_S2-Day0_S3_l2fc_fdr0p2ver__TPM__final191204.csv") #191203 追加

TPM fig

使用するものをプロット (191203修正)

“Acta1”,“Myh3”,“Ckm”,“Tnnt2”,“Actb”,“Csrp3”


#======== Change every data ここで順番を変更 ========#

#matome_select <- matome %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#matome_select <- matome_select %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#matome_select <- matome_select %>% mutate(time=factor(time, c("UI","0h","24h","48h")))

matome_select <- matome %>% mutate(time=factor(time, c("UI", "0h","24h","48h"))) %>% mutate(type=factor(type,c("DoxPlus","DoxMinus"))) %>% mutate(rep=factor(rep, c("1", "2", "3", "4")))


#matome_select <- matome %>% filter(intact_CTX=="CTX"|intact_CTX=="SKM") %>%  mutate(WT_KO=factor(WT_KO, c("H3mm18KO","WT"))) %>% mutate(Day=factor(Day, c("Day0","Day5","Day14"))) %>% mutate(intact_CTX=factor(intact_CTX, c("CTX","SKM")))

#-------#

tbl <- matome_select %>% filter(ext_gene %in% c("Myh3","Ckm","Acta1","Tnnt2","Actb","Csrp3"))

#tbl2 <- matome_select %>% filter(ext_gene %in% c("Acta1","Tpm2"))

#====================================================#

#f <- function(x) x %>% group_by(WT_KO,Day,intact_CTX,ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup() #重要 (CTX用に変更)

f <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup()
#----#
tbl %>% group_by(group, type, time) %>% summarize()
`summarise()` regrouping output by 'group', 'type' (override with `.groups` argument)
#----#

#face="italic"
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合

### point ###
gggggpp <-  ggplot(tbl,aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",2)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top",  plot.title=element_text(size=16))+ggsci::scale_color_npg()


file_path <- paste("TPM__", csvfilepath, "__with_point__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 10)

#ggsave(file=file_path, plot = gggggpp)
#ggsave(file="TPM__BRB0438noumi_H3mm18KO_CTX_S2-Day0_S3__with_point__final191204.pdf", plot = gggggpp)
print(gggggpp)

gggggpp <-  ggplot(tbl,aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",2)+geom_smooth(se=FALSE)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top",  plot.title=element_text(size=16))+ggsci::scale_color_npg()

file_path <- paste("TPM__", csvfilepath, "__with_point__final191205_last200811_smooth.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 10)


#ggsave(file=file_path, plot = gggggpp)
#ggsave(file="TPM__BRB0438noumi_H3mm18KO_CTX_S2-Day0_S3__with_point__final191204_smooth.pdf", plot = gggggpp)
#print(gggggpp)

“Acta1”,“Myh3”,“Nsdhl”

#20191204 作成

tbl2 <- matome_select %>% filter(ext_gene %in% c("Acta1","Myh3","Nsdhl"))
#%>% mutate(ext_gene=factor(ext_gene, c("Acta1","Nsdhl","Myh3")))

#===============================#
## SKMもCTXと同じ色で表示

### point ###
gggggpp <-  ggplot(tbl2,aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y")+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24,face="italic"), legend.position = "top",  plot.title=element_text(size=20)) +ggsci::scale_color_npg()  + ggtitle("3T3 Dox +-")

file_path <- paste("TPM__", csvfilepath, "__with_point__Acta1_Myh3_Nsdhl__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 6) #2つ図なら width = 8
print(gggggpp)


#ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 8, height = 6) #2つ図なら width = 8
#ggsave(file="TPM__BRB0438noumi_H3mm18KO_CTX_S2-Day0_S3__with_point__Acta1_Tpm2__final191204.pdf", plot = gggggpp, dpi = 100, width = 8, height = 6)

“Slc38a2”,“Inhba”,“Acta1”,“Myog”,“Myh9”,“Rpl13”

#20191204 作成

rrres_LRT %>% filter(ext_gene %in% c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))

nbl3 <- matome_select %>% filter(ext_gene %in% c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))  %>% mutate(ext_gene=factor(ext_gene,c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))) 


#===============================#
## SKMもCTXと同じ色で表示

### point ###
gggggpp <-  ggplot(nbl3,aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y")+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24,face="italic"), legend.position = "top",  plot.title=element_text(size=20)) +ggsci::scale_color_npg()  + ggtitle("3T3 Dox +-")

file_path <- paste("TPM__", csvfilepath, "__with_point__Cluste_Rep__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 6) #2つ図なら width = 8
print(gggggpp)


#ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 8, height = 6) #2つ図なら width = 8
#ggsave(file="TPM__BRB0438noumi_H3mm18KO_CTX_S2-Day0_S3__with_point__Acta1_Tpm2__final191204.pdf", plot = gggggpp, dpi = 100, width = 8, height = 6)

ALL DEG, LRT FDR < 0.1

# DEGをclusterに分けて表示 (191206作成)

#-- 設定 -----#
plot_title1 <- "TPM"
plot_title2 <- "DEG (LRT): BRB-seq 3T3 Dox +-"
#folder_name <- "LRT"

#-- ファイル名 の設定 ---#
plot_title0 <- paste(plot_title1, plot_title2, sep=", ")
#folder_name_plot0 <- paste(".",folder_name, plot_title1,"",sep="/")
#folder_name_plot_path <- paste(folder_name_plot0,paste(folder_name,plot_title1,"",sep="_"),sep="")

#------------------------#

#===============================#

#----- 上で出てきたDEGのリストの取り出し -----#
deg_cluster_list <- rrres_LRT  %>% arrange(cluster)
deg_cluster_size <- deg_cluster_list %>% arrange(ens_gene) %>% group_by(cluster) %>% summarize(size=n())
`summarise()` ungrouping output (override with `.groups` argument)
#----- DEGのみ取り出す ------#
deg_matome <- matome %>% filter(ens_gene %in% deg_cluster_list$ens_gene) %>% right_join(deg_cluster_list %>% select(ens_gene,cluster), by = "ens_gene")
#deg_matome <- deg_matome %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#deg_matome <- deg_matome %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#deg_matome <- deg_matome %>% mutate(time=factor(time, c("UI","0h","24h","48h")))


deg_matome <- deg_matome %>% mutate(time=factor(time, c("UI", "0h","24h","48h"))) %>% mutate(type=factor(type,c("DoxPlus","DoxMinus"))) %>% mutate(rep=factor(rep, c("1", "2", "3", "4")))


#------------------------------#

f <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup()

#===============================#
### point ###
## クラスターごと ##
for (i in 1:4) {
  cluster_now <- paste("cluster",deg_cluster_size$cluster[i],sep="")
  plot_title0_cluster <- paste(plot_title0, cluster_now, deg_cluster_size$size[i], sep=", ") #クラスターの個数も表示(20191025)
  
  print(plot_title0_cluster)
  
  ppplotsize <- (as.integer(deg_cluster_size$size[i]/10) + 1) * 2 + 1
  
  cluster_plotsave <- deg_matome %>% filter(cluster==deg_cluster_size$cluster[i]) %>%  ggplot(aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle(plot_title0_cluster)
  
  
  file_path <- paste(folder_name_plot_path, "TPM__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_",cluster_now,".pdf",sep="") 
  ggsave(plot=cluster_plotsave,file=file_path, width = 20, height = ppplotsize, dpi = 120, limitsize = FALSE)

}
[1] "TPM, DEG (LRT): BRB-seq 3T3 Dox +-, cluster1, 42"
[1] "TPM, DEG (LRT): BRB-seq 3T3 Dox +-, cluster2, 54"
[1] "TPM, DEG (LRT): BRB-seq 3T3 Dox +-, cluster3, 93"
[1] "TPM, DEG (LRT): BRB-seq 3T3 Dox +-, cluster4, 37"
#===============================#
### point ###
## まとめて ##

#-- 斜めのラベル --#

ppplotsize <- (as.integer(nrow(deg_cluster_list)/10) + 1) * 2 + 1

cluster_plotsave <- deg_matome %>% ggplot(aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle("3T3 Dox +-")


file_path <- paste(folder_name_plot_path, "TPM__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_all_1.pdf",sep="") 
ggsave(plot=cluster_plotsave,file=file_path, width = 20, height = ppplotsize, dpi = 360, limitsize = FALSE)

#-- 横向きのラベル --#

ppplotsize <- ppplotsize * 1.25

cluster_plotsave <- deg_matome %>% ggplot(aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle("3T3 Dox +-")

file_path <- paste(folder_name_plot_path, "TPM__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_all_2.pdf",sep="") 
ggsave(plot=cluster_plotsave,file=file_path, width = 25, height = ppplotsize, dpi = 360, limitsize = FALSE)


#--memo--#
#face="italic"
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合

TPM作図終了

normalizedcount plot

set normCount table

# 2019 12月作成

nrow(normalizedcount)
[1] 21707
nrow(normalizedcount %>% inner_join(e2g, by = "ens_gene"))
[1] 21707
#---------#
norm_plotlist_all <- normalizedcount %>% gather("sample", "normalized",-(ens_gene)) %>% inner_join(def, by = "sample") %>% inner_join(e2g, by = "ens_gene") 
#norm_plotlist_all <- norm_plotlist_all %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#norm_plotlist_all <- norm_plotlist_all %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#norm_plotlist_all <- norm_plotlist_all %>% mutate(time=factor(time, c("UI","0h","24h","48h")))

norm_plotlist_all <- norm_plotlist_all %>% mutate(time=factor(time, c("UI", "0h","24h","48h"))) %>% mutate(type=factor(type,c("DoxPlus","DoxMinus"))) %>% mutate(rep=factor(rep, c("1", "2", "3", "4")))

readr::write_csv(norm_plotlist_all,paste(csvfilepath, "__normCount__final191205_last200811.csv",sep="")) #191206 追加

normcount fig

ALL DEG, LRT FDR < 0.1 (Total 229)

# 20191205修正、20191024
# DEGをclusterに分けて表示 (191206作成)

#-- 設定 -----#
plot_title1 <- "normCount"
plot_title2 <- "DEG (LRT): BRB-seq 3T3 Dox +-"
#folder_name <- "LRT"

#-- ファイル名 の設定 ---#
plot_title0 <- paste(plot_title1, plot_title2, sep=", ")
#folder_name_plot0 <- paste(".",folder_name, plot_title1,"",sep="/")
#folder_name_plot_path <- paste(folder_name_plot0,paste(folder_name,plot_title1,"",sep="_"),sep="")

#------------------------#

#===============================#
## norm count 後のデータ

#----- 上で出てきたDEGのリストの取り出し -----#
#----- DEGのみ取り出す ------#
deg_normcount <- norm_plotlist_all %>% filter(ens_gene %in% deg_cluster_list$ens_gene) %>% right_join(deg_cluster_list %>% select(ens_gene,cluster), by = "ens_gene")
#deg_normcount <- deg_normcount %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#deg_normcount <- deg_normcount %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#deg_normcount <- deg_normcount %>% mutate(time=factor(time, c("UI","0h","24h","48h")))

#plotlist_cluster <- deg_normcount
#------------------------------#


f_gene_norm <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(normalized),se=sd(normalized)/sqrt(length(normalized))) %>% ungroup()
#f <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup()

#===============================#
### point ###
## クラスターごと ##
for (i in 1:4) {
  cluster_now <- paste("cluster",deg_cluster_size$cluster[i],sep="")
  plot_title0_cluster <- paste(plot_title0, cluster_now, deg_cluster_size$size[i], sep=", ") #クラスターの個数も表示(20191025)
  
  print(plot_title0_cluster)
  
  ppplotsize <- (as.integer(deg_cluster_size$size[i]/10) + 1) * 2 + 1
  
  cluster_plotsave <- deg_normcount %>% filter(cluster==deg_cluster_size$cluster[i]) %>%  ggplot(aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle(plot_title0_cluster) + ylab("normalized count") 
  
  
  file_path <- paste(folder_name_plot_path, "normCount__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_",cluster_now,".pdf",sep="") 
  ggsave(plot=cluster_plotsave,file=file_path, width = 20, height = ppplotsize, dpi = 120, limitsize = FALSE)

}
[1] "normCount, DEG (LRT): BRB-seq 3T3 Dox +-, cluster1, 42"
[1] "normCount, DEG (LRT): BRB-seq 3T3 Dox +-, cluster2, 54"
[1] "normCount, DEG (LRT): BRB-seq 3T3 Dox +-, cluster3, 93"
[1] "normCount, DEG (LRT): BRB-seq 3T3 Dox +-, cluster4, 37"
#===============================#
### point ###
## まとめて ##

#-- 斜めのラベル --#

ppplotsize <- (as.integer(nrow(deg_cluster_list)/10) + 1) * 2 + 1

cluster_plotsave <- deg_normcount %>% ggplot(aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle("3T3 Dox +-") + ylab("normalized count") 


file_path <- paste(folder_name_plot_path, "normCount__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_all_1.pdf",sep="") 
ggsave(plot=cluster_plotsave,file=file_path, width = 20, height = ppplotsize, dpi = 360, limitsize = FALSE)

#-- 横向きのラベル --#

ppplotsize <- ppplotsize * 1.25

cluster_plotsave <- deg_normcount %>% ggplot(aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle("3T3 Dox +-") + ylab("normalized count") 

file_path <- paste(folder_name_plot_path, "normCount__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_all_2.pdf",sep="") 
ggsave(plot=cluster_plotsave,file=file_path, width = 25, height = ppplotsize, dpi = 360, limitsize = FALSE)


#--memo--#
#face="italic"
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合

使用するものをプロット

“Acta1”,“Myh3”,“Ckm”,“Tnnt2”,“Actb”,“Csrp3”


#======== Change every data ここで順番を変更 ========#

#-------#

nbl <- norm_plotlist_all %>% filter(ext_gene %in% c("Myh3","Ckm","Acta1","Tnnt2","Actb","Csrp3"))

#====================================================#

#f_gene_norm <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(normalized),se=sd(normalized)/sqrt(length(normalized))) %>% ungroup()

#----#
nbl %>% group_by(group, type, time) %>% summarize()
`summarise()` regrouping output by 'group', 'type' (override with `.groups` argument)
#----#

#face="italic"
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合

### point ###
gggggpp <-  ggplot(nbl,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",2)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top",  plot.title=element_text(size=16))+ggsci::scale_color_npg()  + ylab("normalized count")

file_path <- paste("normCount__", csvfilepath, "__with_point__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 10)

print(gggggpp)


gggggpp <-  ggplot(nbl,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",2)+geom_smooth(se=FALSE)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top",  plot.title=element_text(size=16))+ggsci::scale_color_npg()  + ylab("normalized count")

file_path <- paste("normCount__", csvfilepath, "__with_point__final191205_last200811_smooth.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 10)

NA
NA

“Acta1”,“Myh3”,“Nsdhl”

#20191204 作成

nbl2 <- norm_plotlist_all %>% filter(ext_gene %in% c("Acta1","Myh3","Nsdhl"))

#%>% mutate(ext_gene=factor(ext_gene, c("Acta1","Nsdhl","Myh3")))

#===============================#

### point ###
gggggpp <-  ggplot(nbl2,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y")+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24,face="italic"), legend.position = "top",  plot.title=element_text(size=20)) +ggsci::scale_color_npg()  + ggtitle("3T3 Dox +-")  + ylab("normalized count")

file_path <- paste("normCount__", csvfilepath, "__with_point__Acta1_Myh3_Nsdhl__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 6) #2つ図なら width = 8
print(gggggpp)

NA
NA
#20191204 作成

rrres_LRT %>% filter(ext_gene %in% c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))

nbl3 <- norm_plotlist_all %>% filter(ext_gene %in% c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))  %>% mutate(ext_gene=factor(ext_gene,c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))) 

#%>% mutate(ext_gene=factor(ext_gene, c("Acta1","Nsdhl","Myh3")))

#===============================#

### point ###
gggggpp <-  ggplot(nbl3,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",nrow=1)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24,face="italic"), legend.position = "top",  plot.title=element_text(size=20)) +ggsci::scale_color_npg()  + ggtitle("3T3 Dox +-")  + ylab("normalized count")

file_path <- paste("normCount__", csvfilepath, "__with_point__Cluste_Rep__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 16, height = 5) #2つ図なら width = 8
print(gggggpp)


nbl3

ここまで実行

GO解析

クラスタリング の結果をGO

20191206修正

#20191206修正

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__kmeans_cluster.csv",sep="") 
print(file_path)
[1] "./LRT/DEG_fdr0p1__BRB0432lane2noumi_H3mm18_Dox_kmeans4__kmeans_cluster.csv"
file_degcluster <- file_path

table_degcluster <- readr::read_csv(file_degcluster) %>% arrange(ens_gene) %>% dplyr::select(ens_gene,ext_gene,cluster)
Parsed with column specification:
cols(
  ens_gene = col_character(),
  ext_gene = col_character(),
  biotype = col_character(),
  chr = col_character(),
  cluster = col_double()
)
table_degcluster %>% group_by(cluster) %>% summarise(size=n())
`summarise()` ungrouping output (override with `.groups` argument)
##### FDR setting ######
gofdr <- 0.1

cluster_num <- 4
# 20191206修正

library(clusterProfiler)
library(org.Mm.eg.db)

folder_path <- "./LRT/clusterProfile/"

#-------------#
file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans_BPfdr0p1_generatio",sep="")
filename_csv <- file_path

file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans_BPfdr0p1_generatio_cluster",sep="")
filename_list <- file_path

print(filename_list)
[1] "./LRT/clusterProfile/DEG_fdr0p1__BRB0432lane2noumi_H3mm18_Dox_kmeans_BPfdr0p1_generatio_cluster"
print(filename_csv)
[1] "./LRT/clusterProfile/DEG_fdr0p1__BRB0432lane2noumi_H3mm18_Dox_kmeans_BPfdr0p1_generatio"
#例 filename_list <- "./LRT/clusterProfile/H3mm18KO_mouseCTX_BRB0438_day5_2gunfdr0p2_kmeans_BPfdr0p1_generatio_cluster"
#例 filename_csv <- "./LRT/clusterProfile/H3mm18KO_mouseCTX_BRB0438_day5_2gunfdr0p2_kemans_BPfdr0p1_generatio"
#-------------#

cluster_list <- as.list(NA) #初期化

for (i in 1:cluster_num) {
   pre_list <- as.list(NA)
   pre_list <- table_degcluster %>% filter(cluster==as.double(i)) %>% dplyr::select(ens_gene) %>% as.list()
   names(pre_list) <- paste("ENSEMBL",as.character(i),sep="_")
 
   if (i == 1) { 
     cluster_list <- pre_list
   } 
   else cluster_list <- c(cluster_list, pre_list) 
}


for (i in 1:cluster_num) {
   print(paste(i, cluster_list[[i]] %>% tibble::enframe(name = NULL) %>% nrow(), sep=", "))
  
   pre_ego_BP <- enrichGO(gene = cluster_list[[i]],
                 OrgDb = "org.Mm.eg.db",
                 keyType = 'ENSEMBL',
                 ont = "BP",
                 pAdjustMethod = "BH",
                 pvalueCutoff  = gofdr, qvalueCutoff  = 1.0) #20191211修正  pvalueCutoff  = fdr
   
   ## pvalue < qvalue < p.adjust ##
   # qvalueCutoff  = 0.3  qvalueCutoff  = 0.2 , qvalueCutoff  = 1.0

  
   if (i == 1) { 
     table_ego_BP <- data.frame(pre_ego_BP) %>% mutate(cluster=paste("cluster",as.character(i),sep=""))  # リスト型からデータフレームへ変換
   } 
   else table_ego_BP <- table_ego_BP %>% bind_rows(data.frame(pre_ego_BP) %>% mutate(cluster=paste("cluster",as.character(i),sep="")))
   
   #---- plot ---#
   BPplot <- dotplot(pre_ego_BP, showCategory=30, orderBy = "Count") #clusterProfile の機能で図を描く(191106修正) wrong orderBy parameter; set to default `orderBy = "x"`
   print(BPplot)
   ggsave(BPplot,file=paste(filename_list,as.character(i),".png",sep=""), width = 8, height = 12, dpi = 120)
   
   BPplot <- dotplot(pre_ego_BP, showCategory=10, orderBy = "Count") #clusterProfile の機能で図を描く(191106修正) wrong orderBy parameter; set to default `orderBy = "x"`
   print(BPplot)
   ggsave(BPplot,file=paste(filename_list,as.character(i),"_Category10.png",sep=""), width = 8, height = 4, dpi = 120)
   
   BPplot <- dotplot(pre_ego_BP, showCategory=5, orderBy = "Count") #clusterProfile の機能で図を描く(191106修正) wrong orderBy parameter; set to default `orderBy = "x"`
   print(BPplot)
   ggsave(BPplot,file=paste(filename_list,as.character(i),"_Category5.png",sep=""), width = 8, height = 3, dpi = 120)
}
[1] "1, 42"
[1] "2, 54"
[1] "3, 93"
[1] "4, 37"

print(table_ego_BP %>% group_by(cluster) %>% summarize())
`summarise()` ungrouping output (override with `.groups` argument)
#------#
# データはtable_ego_BPに。
# 20191206修正
#------------------------------------------------------#
# テーブルを保存
# table_ego_BP_3t3_LRT2 <- table_ego_BP

table_ego_BP1 <- table_ego_BP %>% mutate(cluster=factor(cluster,c("cluster1","cluster2","cluster3","cluster4"))) %>% arrange(cluster,desc(Count)) #191106

readr::write_csv(table_ego_BP1,paste(filename_csv,".csv",sep=""))

# 先のテーブルのgeneIDをgene nameに置換する。(20191025)

tablego <- table_ego_BP1 %>% mutate(gene_name=geneID) %>% dplyr::select(-(qvalue))

for (i in 1:nrow(table_degcluster)) {
  tablego <- tablego %>% mutate(gene_name=gsub(gene_name, pattern=table_degcluster$ens_gene[i], replacement=table_degcluster$ext_gene[i], ignore.case = TRUE))
}

print(tablego)

readr::write_csv(tablego,paste(filename_csv,"_genename.csv",sep=""))

#------------------------------------------------------#

#GOのtermの数
print(tablego %>% group_by(cluster) %>% summarize(cluster_3t3Dox_num = dplyr::n()))
`summarise()` ungrouping output (override with `.groups` argument)
## 変更 ##
table_ego_BP_2gunfdr0p2_cluster <- tablego

#--- メモ ----#
#tableggg <- table_ego_clustercluster
#colm <- tableggg$geneID
#for (i in 1:88) {
#  colm <- sub(rrres_cluster3$ens_gene[i], rrres_cluster3$ext_gene[i], colm)
#}
#print(colm)
# 20191206, 191211修正
# Benjamini correction を p-adjust として使用する

file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans_BPfdr0p1.pdf",sep="")
file_BP_plot <- file_path

file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans__BPfdr0p1_muscleonly.pdf",sep="")
file_BP_plot_muscle <- file_path

print(file_BP_plot)
[1] "./LRT/clusterProfile/DEG_fdr0p1__BRB0432lane2noumi_H3mm18_Dox_kmeans_BPfdr0p1.pdf"
print(file_BP_plot_muscle)
[1] "./LRT/clusterProfile/DEG_fdr0p1__BRB0432lane2noumi_H3mm18_Dox_kmeans__BPfdr0p1_muscleonly.pdf"
# 例 file_BP_plot <- "./2gun/clusterProfile/BPfdr0p1__H3mm18KO_mouseCTX_BRB0438_day5_2gunfdr0p2_kmeans.pdf"
# 例 file_BP_plot_muscle <- "./2gun/clusterProfile/BPfdr0p1__H3mm18KO_mouseCTX_BRB0438_day5_2gunfdr0p2_kmeans_muscleonly.pdf"

#--------------------#

BP_matome <- tablego

rowlength <- BP_matome %>% group_by(Description) %>% summarize() %>% nrow()
`summarise()` ungrouping output (override with `.groups` argument)
BP_plot <- BP_matome %>% filter(p.adjust<gofdr) %>% ggplot(aes(x=cluster, y=reorder(Description,Count), size=Count, fill=p.adjust)) + geom_point(shape = 21) + theme(strip.text = element_text(size = 10)) + theme_minimal() + theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_gradient(low = "red" , high = "blue")+ xlab("3T3 Dox +-") + ylab("GO Description (BP)") + labs(fill=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(BP_plot)
ggsave(plot=BP_plot,file=file_BP_plot, width = 10, height = (5+rowlength/6), dpi = 120,limitsize = FALSE)


#---muscle関連のみ
BP_matome_muscle <- BP_matome %>% filter(grepl("muscle", Description)|grepl("myo", Description))
rowlength <- BP_matome_muscle %>% group_by(Description) %>% summarize() %>% nrow()
`summarise()` ungrouping output (override with `.groups` argument)
BP_plot_muscle <- BP_matome_muscle %>% filter(p.adjust<gofdr) %>% ggplot(aes(x=cluster, y=reorder(Description,Count), size=Count, fill=p.adjust)) + geom_point(shape = 21) + theme(strip.text = element_text(size = 10)) + theme_minimal() + theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_gradient(low = "red" , high = "blue") + xlab("3T3 Dox +- (muscle)") + ylab("GO Description (BP)") + labs(fill=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(BP_plot_muscle)
ggsave(BP_plot_muscle,file=file_BP_plot_muscle, width = 8, height = (5+rowlength/6), dpi = 120,limitsize = FALSE)

NA
NA
# 20191206修正
# 20191003追加, 191120変更

#----- countのみのtable --------------#
aaa_tablego <- tablego %>% dplyr::select(ID,Description,cluster,Count) %>% spread(key=cluster,value = Count,fill=0) %>% mutate(Total= cluster2 + cluster3 +cluster4)  %>% arrange(desc(Total))

#aaa_tablego <- tablego %>% dplyr::select(ID,Description,cluster,Count) %>% spread(key=cluster,value = Count,fill=0) %>% mutate(Total= cluster1 +cluster2 + cluster3 +cluster4)  %>% arrange(desc(Total))

file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans__BPfdr0p1__count_table.csv",sep="")
print(file_path)
[1] "./LRT/clusterProfile/DEG_fdr0p1__BRB0432lane2noumi_H3mm18_Dox_kmeans__BPfdr0p1__count_table.csv"
readr::write_csv(aaa_tablego,file_path)

#例 "./2gun/clusterProfile/BPfdr0p1__H3mmKO_mouseCTX_BRB0438_2gunfdr0p2_kemans__count_table.csv"
#-------------------------------------#
# x=pvalue, y=p.adjust
plottt <- table_ego_BP %>% ggplot(aes(x=pvalue, y=p.adjust, size=Count)) + geom_point()+geom_abline(intercept=0,slope=1.0,linetype="dashed",colour="blue") + xlim(0,NA) + ylim(0,NA) + ggtitle(label=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(plottt)


# x=pvalue, y=qvalue
plottt <- table_ego_BP %>% ggplot(aes(x=pvalue, y=qvalue, size=Count)) + geom_point()+geom_abline(intercept=0,slope=1.0,linetype="dashed",colour="blue") + xlim(0,NA) + ylim(0,NA) + ggtitle(label=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(plottt)


# x=p.adjust, y=qvalue
plottt <- table_ego_BP %>% ggplot(aes(x=p.adjust, y=qvalue, size=Count)) + geom_point()+geom_abline(intercept=0,slope=1.0,linetype="dashed",colour="blue") + xlim(0,NA) + ylim(0,NA) + ggtitle(label=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(plottt)



## pvalue < qvalue < p.adjust ##
# pvalue < p.adjust
# pvalue < qvalue
# qvalue < p.adjust

#---------------------#

#[BBRB-seq_0438_QC_tmpl_v6_noumi_190515-H3mm18KO_CTX_S2-Day0_S3_fdr0p2ver_and_LRT_191024  (umi補正なし,fdr0.2) (TPM 190722ver) (190924を元に) (190627-1024)]を参考にした。

#pvalueCutoff   
#pvalue cutoff on enrichment tests to report

#pAdjustMethod  
#one of "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"

#qvalueCutoff   
#qvalue cutoff on enrichment tests to report as significant. Tests must pass i) pvalueCutoff on unadjusted pvalues, ii) pvalueCutoff on adjusted pvalues and iii) qvalueCutoff on qvalues to be reported.

# 設定(pvalueCutoff  = 0.1, qvalueCutoff  = 0.2)だと、p値<0.1, p.adjust値<0.1, q値<0.2 になっている。

GOここまで

この後に以前はfantom5のデータのコードが入っていたが、カット。 mouse CTX (BRB 0438) の結果との比較もカット

---
title: "[Last 200811, Final 191205-1212, 18project, 3T3] BRBseq0432lane2_QC_tmpl_v6_noumi_H3mm18_Dox_linear_0718_fin191205_last200811 (umi補正なし,fdr0.1) (TPM,尤度比検定)"
output:
  html_notebook: 
    toc: yes
  pdf_document: 
    keep_tex: yes
    latex_engine: lualatex
---

### Setup

```{r libraries,message=FALSE}
library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
source("/home/guestA/n70275b/work/rscripts/geomNorm.R")

# Helper function
#ggpoints <- function(x,...) 
#  ggplot(x,...) + geom_point(size=3,stroke=1) +
#  ggrepel::geom_text_repel(size=4) + theme_minimal() + mycolor

## ラベルあり
ggpoints <- function(x,...) 
  ggplot(x,...) + geom_point(stroke=1) +
  ggrepel::geom_text_repel(size=4) + theme_minimal() + mycolor

## ラベルなし
#ggpoints <- function(x,...) 
#  ggplot(x,...) + geom_point(stroke=1) + theme_minimal() + mycolor

print(sessionInfo(),locale=FALSE)

select <- dplyr::select
count <- dplyr::count
rename <- dplyr::rename
```

### Parameters

*modify here*

```{r params}
# Files


#deftable <- "~/akuwakado/kuwakado/BRBSeq/H3mm18_Dox_0432lane2/Final_Rserver_191203/deftable_BRB_noumi_new_190520_fin191205ver.txt" #Umi補正なし (BRB)

deftable <- "/home/guestA/o70578a/akuwakado/kuwakado/BRBSeq/H3mm18_Dox_0432lane2/Final_Last_Rserver_200811/deftable_BRB_noumi_new_190520_Last20200811ver.txt"

#deftable <- "~/akuwakado/kuwakado/BRBSeq/H3mm18_Dox_0432lane2/Final_Rserver_191203/deftable_BRB_noumi_new_190520_fin191205ver.txt" #Umi補正なし (BRB)


#deftable <- "deftable_BRB_noumi_new_190520.txt" #Umi補正なし (BRB)

#deftable <- "~/akuwakado/kuwakado/BRBSeq/H3mm18_Dox_0432lane2/deftable_BRB_noumi_new_190520.txt" #Umi補正なし (BRB)


## Data selection (filter rows of deftable)
#use <- quo(!grepl("^18",group) & (group != "Nc-minusTryd"))
#use <- quo(TRUE) # use all
use <- quo(type != "C2C12")

# Species specific parameters
species <- "Mus musculus"
biomartann <- "mmusculus_gene_ensembl"
maxchrom <- 19 # 19: mouse, 22: human


# Graphics
# aesthetic mapping of labels
#myaes <- aes(colour=enzyme,shape=leg,label=rep) 
#myaes <- aes(colour=growth,shape=type,size=count) #ラベルなし
#myaes <- aes(colour=growth,shape=type,label=replicate,size=count) #ラベルあり
#myaes <- aes(colour=enzyme,shape=leg,label=replicate) #ラベルあり
#myaes <- aes(colour=enzyme,shape=leg,label=factor(rep))
#myaes <- aes(colour=type, shape=revcro, label=read, size=count)
#myaes <- aes(colour=type, shape=revcro, label=read)

#myaes <- aes(colour=growth,shape=type,label=replicate,size=count) #ラベルあり
#myaes <- aes(colour=time,shape=type,size=count,label=replicate)
#myaes <- aes(colour=WT_KO_intact_CTX, shape=Day,size=count,label=f_m)

#myaes <- aes(colour=WT_KO_intact_CTX, shape=Day, label=f_m) #サイズを変えず
#myaes <- aes(colour=growth,shape=type,label=replicate,size=count) #ラベルあり
myaes <- aes(colour=time,shape=type,label=rep,size=count) #ラベルあり
myaes2 <- aes(colour=time,shape=type) #kuwa add

# color palette of points: See vignette("ggsci")
mycolor <- ggsci::scale_color_aaas()



# PCA/UMAP
scalerows <- TRUE # gene-wise scaling (pattern is the matter?)
ntop <- 500 # number of top-n genes with high variance
seed <- 123 # set another number if UMAP looks not good
n_nei <- 6  # number of neighboring data points in UMAP #ここをどうしたらいい？


# DESeq2
#model <- ~groupn+lead #dateも追加
#model <- ~leg + enzyme + leg:enzyme
#model <- ~type+growth#+type:growth
#model <- ~group+lead


#model <- ~group
#model <- ~type+growth+type:growth #これでは相互作用が入っていない
#model <- ~type+growth #これでは相互作用が入っていない


model <- ~group
#model <- ~type+growth+growth:type

fdr <- 0.1 # acceptable false discovery rate
lfcthreth <- log2(1) # threshold in abs(log2FC)

# controls should be placed in the right
contrast <- list(
  
  group_UI_Doxplus_vs_minus = c("group", "BRB_UI_DoxPlus", "BRB_UI_DoxMinus"),
  group_0h_Doxplus_vs_minus = c("group", "BRB_0h_DoxPlus", "BRB_0h_DoxMinus"),
  group_24h_Doxplus_vs_minus = c("group", "BRB_24h_DoxPlus", "BRB_24h_DoxMinus"),
  group_48h_Doxplus_vs_minus = c("group", "BRB_48h_DoxPlus", "BRB_48h_DoxMinus")
  
  
  #group_UI_Doxplus_vs_minus = c("group", "Doxplus_UI", "Doxminus_UI"),
  #group_Diff0h_Doxplus_vs_minus = c("group", "Doxplus_Diff0h", "Doxminus_Diff0h"),
  #group_Diff24h_Doxplus_vs_minus = c("group", "Doxplus_Diff24h", "Doxminus_Diff24h"),
  #group_Diff48h_Doxplus_vs_minus = c("group", "Doxplus_Diff48h", "Doxminus_Diff48h")
  
  
  #Intercept = list("Intercept"), # reference level
  #leg_LvsR = c("leg", "L", "R"),
  #enz_KvsC = c("enzyme","K","C")
  #legL.enzK = list("legL.enzymeK") # interaction
  
  #type_Doxplus_vs_minus = c("type", "Doxplus", "Doxminus")
)
```



### Retrieve Biomart

```{r biomart, cache=TRUE}
if(!exists("e2g")){
  #ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",host="asia.ensembl.org")
  #ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",host="uswest.ensembl.org")
  ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",host="useast.ensembl.org")
  mart <- biomaRt::useDataset(biomartann,mart=ensembl)
  e2g <- biomaRt::getBM(attributes=c("ensembl_gene_id","external_gene_name",
    "gene_biotype","chromosome_name"), mart=mart) %>% as_tibble %>%
  rename(
    ens_gene = ensembl_gene_id,
    ext_gene = external_gene_name,
    biotype = gene_biotype,
    chr = chromosome_name
  )
}
annotate <- partial(right_join,e2g,by="ens_gene")

#-----#
nrow(e2g)
#readr::write_csv(e2g,"ensemble_list_asia.csv")
#readr::write_csv(e2g,"ensemble_list_uswest.csv")
readr::write_csv(e2g,"ensemble_list_useast.csv")
```

### Load counts

```{r loadUMI}
def <- readr::read_tsv(deftable) %>% filter(!!use)
print(def)

#def$growth <- factor(def$growth,levels =c("UI","Diff0h","Diff24h","Diff48h"))
#def$type <- factor(def$type,levels =c("Doxminus","Doxplus"))

#factor(def$growth,levels =c("UI","Diff0h","Diff24h","Diff48h"))
# [1] UI      UI      UI      UI      UI      UI      UI      UI      Diff0h  Diff0h  Diff0h  Diff0h  Diff0h  Diff0h  Diff0h  Diff0h  Diff24h Diff24h Diff24h Diff24h
#[21] Diff24h Diff24h Diff24h Diff24h Diff48h Diff48h Diff48h Diff48h Diff48h Diff48h Diff48h Diff48h
#Levels: UI Diff0h Diff24h Diff48h


####--- New ---#### (no UMI ?)
# Set reference levels according to the contrast
for(x in keep(contrast,is.character))
  def[[x[1]]] <- relevel(factor(def[[x[1]]]),x[3])

umi <- def$file %>% unique %>% tibble(file=.) %>% 
  dplyr::mutate(data=map(file,readr::read_tsv,progress=FALSE)) %>%
  unnest() %>% dplyr::rename(barcode=cell) %>%
  dplyr::inner_join(select(def,file,barcode,sample),.,c("file","barcode")) %>%
  select(-file,-barcode) %>% dplyr::rename(ens_gene=gene)

print(umi)

## sample, barcode, file を忘れずに！

mat <- umi %>% annotate %>%
  dplyr::mutate(chr=factor(chr,c(1:maxchrom,"X","Y","MT"))) %>%
  filter(!is.na(chr)) %>% spread(sample,count,fill=0)

## to check read vias, this add read number as "n" column (2019/4/19)
def <- umi %>% count(sample,wt=count) %>% dplyr::inner_join(def,.) %>% dplyr::rename(count=n)
####-----------#### 




# Set reference levels according to the contrast
#for(x in keep(contrast,is.character))
#  def[[x[1]]] <- relevel(factor(def[[x[1]]]),x[3])

#umi <- def$file %>% unique %>% tibble(file=.) %>% 
#  mutate(data=map(file,readr::read_tsv,progress=FALSE)) %>%
#  unnest() %>% dplyr::rename(barcode=cell) %>%
#  inner_join(select(def,file,barcode,sample),.,c("file","barcode")) %>%
#  select(-file,-barcode) %>% dplyr::rename(ens_gene=gene)

#mat <- umi %>% annotate %>%
#  mutate(chr=factor(chr,c(1:maxchrom,"X","Y","MT"))) %>%
#  filter(!is.na(chr)) %>% spread(sample,count,fill=0)

print(mat)

## to check read vias, this add read number as "n" column (2019/4/19)
#def <- umi %>% count(sample,wt=count) %>% inner_join(def,.) %>% dplyr::rename(count=n)

print(def)


##====================================##
# 確認 (20191204) ２つの値は一緒か？
# 生のデータカウント中の遺伝子総数

umi %>% group_by(ens_gene) %>% summarize %>% nrow()

umi %>% spread(sample,count,fill=0) %>% nrow()

mat %>% nrow()
mat %>% filter(chr!="MT") %>% nrow() # MTなし

# matでは、chr等が不明なものは省いている。
# DEGでは、さらにMTも省いている。
##====================================##

```

### Reads breakdown

#### Total reads

```{r totalReads, fig.width=7,fig.height=5}
bychr <- mat %>% select(-(1:3)) %>%
  gather("sample","count",-chr) %>%
  group_by(chr,sample) %>% summarise(total=sum(count)) %>% ungroup

ggplot(bychr,aes(reorder(sample,dplyr::desc(sample)),total/1e6,fill=chr)) +
  theme_linedraw() + geom_bar(stat="identity") + coord_flip() +
  xlab("sample") + ylab("million reads") + ggsci::scale_fill_igv() +
  scale_x_discrete(limits = rev(levels(sample)))


```

#### Biotype

```{r biotype,fig.width=8,fig.height=7}
bt <- mat %>% select(-c(1,2,4)) %>% group_by(biotype) %>%
  summarise_all(sum) %>% filter_at(-1,any_vars(. > 1000))
bt %>% tibble::column_to_rownames("biotype") %>%
  as.matrix %>% t %>% mosaicplot(las=2,shade=TRUE)
```

### Correlations

drop rows with all 0 -> +1/2 -> geom.scale -> log -> Pearson's

```{r makemat, fig.width=8,fig.height=7}
matf <- mat %>% filter(chr!="MT") %>% filter_at(-(1:4),any_vars(. > 0))
X <- matf %>% select(-(1:4)) %>% as.matrix
rownames(X) <- matf$ens_gene
lX <- log(gscale(X+0.5))
R <- cor(lX); diag(R) <- NA
pheatmap::pheatmap(R,color=viridis::viridis(256))
```

### Dimension reduction

```{r PCA,fig.width=4,fig.height=3}
# set scale=TRUE if the patterns (not level) is the matter
p <- prcomp(t(lX[rank(-apply(lX,1,var)) <= ntop,]),scale=scalerows,center=TRUE)
screeplot(p,las=2,main="Importance")
print(summary(p)$imp[,seq(min(10,ncol(X)))])
```

```{r makescoreDF}
label <- def %>% filter(sample %in% colnames(X))
df <- data.frame(p$x) %>% as_tibble(rownames="sample") %>%
  inner_join(label,.) %>% select(-file)

print(df)
```

```{r proximity,fig.width=6,fig.height=4}
ggpoints(df,modifyList(aes(PC1,PC2),myaes))
set.seed(seed)
um <- uwot::umap(p$x,n_nei,2)
df <- as_tibble(um) %>% rename(UMAP1=V1,UMAP2=V2) %>% bind_cols(df)
ggpoints(df,modifyList(aes(UMAP1,UMAP2),myaes))

print(df)

##  kuwakado 変更 ##
ggpoints <- function(x,...) 
  ggplot(x,...) + geom_point(stroke=1) + theme_minimal() + mycolor

#ggpoints(df,modifyList(aes(PC1,PC2),myaes2))
#set.seed(seed)
#um <- uwot::umap(p$x,n_nei,2)
#df <- as_tibble(um) %>% rename(UMAP1=V1,UMAP2=V2) %>% bind_cols(df)
#ggpoints(df,modifyList(aes(UMAP1,UMAP2),myaes2))
## ## ## ##
```

### DESeq2

#### Fit model

```{r deseq2}
dds <- DESeq2::DESeqDataSetFromMatrix(X[,label$sample],label,model)
dds <- DESeq2::DESeq(dds)


#=====#

dds <- DESeq2::estimateSizeFactors(dds)
norm <- DESeq2::counts(dds,normalized=TRUE) #DEGを取った後のクラスタリングに使う。

normalizedcount <- as.data.frame(norm) %>% tibble::rownames_to_column("ens_gene") %>% as_tibble
readr::write_csv(normalizedcount, "./H3mm18KO_3T3_Dox_normCount.csv")

normalizedcount %>% inner_join(e2g, by = "ens_gene")  %>% dplyr::select("ens_gene","ext_gene", "biotype","chr", all_of(label$sample)) %>% readr::write_csv("./H3mm18KO_3T3_Dox_normCount_genename.csv")

#count_dds <- estimateSizeFactors(dds)
#counts(count_dds, normalized=TRUE)

####--- + size factors を書き出し ------------------####
as.data.frame(DESeq2::sizeFactors(dds))  %>% tibble::rownames_to_column("sample") %>% readr::write_csv("./H3mm18KO_3T3_Dox__sizefactors.csv")



```


vst => z score

```{r zscore 200811add}

vsd <- DESeq2::vst(dds) #normalized countが入っている。(vstかrlog)
Xd <- SummarizedExperiment::assay(vsd) # 全て選択(200326) 20190920を元に (191024)
Xs <- Xd %>% t %>% scale %>% t

zscore <- Xs %>% as.data.frame() %>% tibble::rownames_to_column("ens_gene") %>% as_tibble
zscore_type <- zscore  %>% annotate %>% dplyr::select("ens_gene","ext_gene", "biotype","chr", all_of(label$sample))



readr::write_csv(zscore, "H3mm18KO_3T3_Dox__zscore_all.csv")
readr::write_csv(zscore_type, "H3mm18KO_3T3_Dox__zscore_type_all.csv")

nrow(zscore_type)

```
#### Diagnostics plot

```{r diagnostics,fig.width=7,fig.height=5}
DESeq2::sizeFactors(dds) %>%
  {tibble(sample=names(.),sizeFactor=.)} %>%
  ggplot(aes(sample,sizeFactor)) + theme_minimal() +
  geom_bar(stat="identity") + coord_flip()
DESeq2::plotDispEsts(dds)
```


#### Extract results
191205修正

```{r extractRes}
res <- mapply(function(x)
  DESeq2::results(dds,x,lfcThreshold=lfcthreth,alpha=fdr)
,contrast)

print(fdr)


# 200811修正
re_all <- map(res,as_tibble,rownames="ens_gene") %>%
  tibble(aspect=factor(names(.),names(.)),data=.) %>%
  mutate(data=map(data,annotate)) %>%
  unnest(cols = "data")

re <- re_all %>% filter(padj<fdr) #191120修正 unnest() 


#re <- map(res,as_tibble,rownames="ens_gene") %>%
#  tibble(aspect=factor(names(.),names(.)),data=.) %>%
#  mutate(data=map(data,annotate)) %>%
#  unnest(cols = "data") %>% filter(padj<fdr) #191120修正 unnest() 

fc <- re %>% select(1:7) %>% spread(aspect,log2FoldChange,fill=0)

imap(res,~{
  cat(paste0("-- ",.y," --"))
  DESeq2::summary(.x) #191120修正 DESeq2::summary.DESeqResults(.x)
}) %>% invisible
```

### GSEA

```{r GSEA, warning=FALSE}
msig <- msigdbr::msigdbr(species)
fgsea_msig <- partial(fgsea::fgsea,with(msig,split(gene_symbol,gs_name)))
gscat <- msig %>% select(gs_name,gs_cat,gs_subcat) %>%
  distinct() %>% rename(pathway=gs_name)

#gsea <- re %>% filter(aspect!="Intercept") %>% group_by(aspect) %>%
#  summarise(l2fc=list(setNames(log2FoldChange,ext_gene))) %>%
#  mutate(gse=map(l2fc,fgsea_msig,nperm=10000,maxSize=500)) %>%
#  select(-l2fc) %>% unnest %>% arrange(-NES) %>% right_join(gscat,.) %>%
#  mutate(leadingEdge=map_chr(leadingEdge,paste,collapse=","))

# gsea 修正ver [20190621]
#gsea <- re %>% filter(aspect!="Intercept") %>% group_by(aspect) %>%
#  summarise(l2fc=list(setNames(log2FoldChange,ext_gene))) %>%
#  mutate(gse=map(l2fc,fgsea_msig,nperm=10000,maxSize=500)) %>%
#  select(-l2fc) %>% unnest %>% arrange(-NES) %>% right_join(gscat,.) %>%
#  mutate(leadingEdge=map_chr(leadingEdge,paste,collapse=",")) %>%
#  group_by(aspect,gs_cat,gs_subcat) %>%
#  mutate(padj=p.adjust(pval,"BH")) %>% ungroup()

# gsea 修正ver [20190627]
gsea <- re %>% filter(aspect!="Intercept") %>% group_by(aspect) %>%
  summarise(l2fc=list(setNames(log2FoldChange,ext_gene))) %>%
  filter(map(l2fc,length)>10) %>%
  mutate(gse=map(l2fc,fgsea_msig,nperm=10000,maxSize=500)) %>%
  select(-l2fc) %>% unnest %>% arrange(-NES) %>% right_join(gscat,.) %>%
  mutate(leadingEdge=map_chr(leadingEdge,paste,collapse=",")) %>%
  group_by(aspect,gs_cat,gs_subcat) %>%
  mutate(padj=p.adjust(pval,"BH")) %>% ungroup()

```

```{r Hallmark, fig.width=8, fig.height=6}
hallmark <- gsea %>% filter(gs_cat=="H") %>%
  mutate(pathway=sub("^HALLMARK_","",pathway)) %>% 
  group_by(aspect) %>% nest %>%
  mutate(plt=map2(data,aspect,~
    ggplot(.x,aes(reorder(pathway,NES),NES,fill=padj<0.1)) +
    ggtitle(.y) + xlab("Hallmark gene sets") +
    geom_bar(stat="identity") + theme_minimal() + coord_flip() +
    theme(legend.position = "none") + ggsci::scale_fill_aaas())
  )
print(hallmark$plt)
```

See MSigDB Collections: <http://software.broadinstitute.org/gsea/msigdb/collections.jsp>

### Write-out tables

```{r writeout}

#csvfilepath <- "BRB0432lane2noumi_H3mm18_Dox"

if(exists("fc"))   readr::write_csv(fc,"./2gun/BRB0432lane2noumi_H3mm18_Dox_l2fc_fdr0p1__final191205_last200811.csv")
if(exists("re"))   readr::write_csv(re,"./2gun/BRB0432lane2noumi_H3mm18_Dox_results_fdr0p1__final191205_last200811.csv")
if(exists("re_all"))   readr::write_csv(re_all,"./2gun/BRB0432lane2noumi_H3mm18_Dox_resultsall_fdr0p1__final191205_last200811.csv")
if(exists("gsea")) readr::write_csv(gsea,"./2gun/BRB0432lane2noumi_H3mm18_Dox_gsea_fdr0p1__final191205_last200811.csv")
```



### Write-out tables Hallmark

```{r writeout Hallmark}
#gseaのHallmarkのみ書き出し
hallmark_gsea <- gsea %>% filter(gs_cat=="H") %>% mutate(pathway=sub("^HALLMARK_","",pathway)) %>% group_by(aspect)
if(exists("hallmark_gsea")) readr::write_csv(hallmark_gsea,"./2gun/BRB0432lane2noumi_H3mm18_Dox_hallmark_gsea_fdr0p1__final191205_last200811.csv")
```

### MAplot

5*7 inch

```{r maplot}

#maplot <- DESeq2::plotMA(res$group_UI_Doxplus_vs_minus, ylim=c(-2,2))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_UI_Doxplus_vs_minus, ylim=c(-4,4), main="UI_Doxplus_vs_minus")
print(maplot)

#maplot <- DESeq2::plotMA(res$group_Diff0h_Doxplus_vs_minus , ylim=c(-2,2))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_0h_Doxplus_vs_minus, ylim=c(-4,4), main="0h_Doxplus_vs_minus")
print(maplot)

#maplot <- DESeq2::plotMA(res$group_Diff24h_Doxplus_vs_minus, ylim=c(-2,2))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_24h_Doxplus_vs_minus, ylim=c(-4,4), main="24h_Doxplus_vs_minus")
print(maplot)

#maplot <- DESeq2::plotMA(res$group_Diff48h_Doxplus_vs_minus, ylim=c(-2,2))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_48h_Doxplus_vs_minus, ylim=c(-4,4), main="48h_Doxplus_vs_minus")
print(maplot)

```

## 尤度比検定

###Fit model LRT (BRBと同じ設定)
ATACのフォーマットを持ってきた

```{r deseq2 2}
def_list_select <- def %>% mutate(time=factor(time, c("UI","0h","24h","48h"))) %>% mutate(type=factor(type, c("DoxMinus","DoxPlus")))

#def_list_select <- def %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxminus","Doxplus")))

#def_list_select <- def

dds0 <- 0
dds1_2 <- 0
res1_2 <- 0


model_full <- ~growth*type # BRB
model_reduced <- ~growth # BRB

#model_full <- ~time*type # full model  (~growth*type # BRB)
#model_reduced <- ~time # reduced model (~growth # BRB)

colnames(X) #使用するサンプル

# full model
dds0_LRT <- DESeq2::DESeqDataSetFromMatrix(X[,def_list_select$sample],def_list_select,model_full)  #full model

# reduced model
dds_LRT <- DESeq2::DESeq(dds0_LRT, test="LRT", reduced=model_reduced) #reduced model
res_LRT <- DESeq2::results(dds_LRT)
res_LRT_fdr0p1 <- DESeq2::results(dds_LRT)  #クラスタリングに使用
res_LRT_fdr0p2 <- DESeq2::results(dds_LRT,alpha=0.2)

print(model.matrix(model_full, def_list_select)) #full model
print(model.matrix(model_reduced, def_list_select)) #reduced model

head(res_LRT[order(res_LRT$pvalue), ])
DESeq2::summary(res_LRT_fdr0p1) #20191108修正
DESeq2::summary(res_LRT_fdr0p2) #20191108修正
#DESeq2::summary.DESeqResults(res_LRT_fdr0p1)
#DESeq2::summary.DESeqResults(res_LRT_fdr0p2)

```

### 尤度比検定 結果

#### result LRT
```{r result LRT}

folder_name_plot_path <- "./LRT/"
csvfilepath <- "BRB0432lane2noumi_H3mm18_Dox"

allres_LRT <- 0

#----- 全ての結果 -----#
allres_LRT <- as_tibble(res_LRT,rownames="ens_gene") %>% right_join(e2g,.)
file_path <- paste(folder_name_plot_path, "all__", csvfilepath, "_last200811.csv",sep="")
readr::write_csv(allres_LRT, file_path) # 全ての結果

nrow(allres_LRT)
nrow(allres_LRT %>% filter(padj<0.1))
nrow(allres_LRT %>% filter(padj<0.2))

#----- fdr 0.1の結果 -----#
LRT_deglist_fdr0p1 <- as_tibble(res_LRT_fdr0p1,rownames="ens_gene") %>% right_join(e2g,.) %>% filter(padj<0.1) #クラスタリングに使用
file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_last200811.csv",sep="") # 今回取得したDEGのリスト
readr::write_csv(LRT_deglist_fdr0p1, file_path) # 全ての結果
nrow(LRT_deglist_fdr0p1)

#----- fdr 0.2の結果 -----#
LRT_deglist_fdr0p2 <- as_tibble(res_LRT_fdr0p2,rownames="ens_gene") %>% right_join(e2g,.) %>% filter(padj<0.2)
file_path <- paste(folder_name_plot_path, "DEG_fdr0p2__", csvfilepath, "_last200811.csv",sep="") # 今回取得したDEGのリスト
readr::write_csv(LRT_deglist_fdr0p2, file_path) # 全ての結果
nrow(LRT_deglist_fdr0p2)
LRT_deglist_fdr0p2 <- NA

# BRBでは right_join(e2g,.)、ATACでは right_join(ensemble,.)

```

#### Clustering LRT


```{r clustering fdr 0p1 kemans4}
#20191205修正と作成

# 全てのデータ

#-- 上で実行 --#
#dds_LRT <- DESeq2::DESeq(dds0_LRT, test="LRT", reduced=model_reduced) #reduced model
#res_LRT <- DESeq2::results(dds_LRT)
#res_LRT_fdr0p1 <- DESeq2::results(dds_LRT)
#res_LRT_fdr0p2 <- DESeq2::results(dds_LRT,alpha=0.2)
#--------------#

#vsd_LRT <- DESeq2::vst(dds_LRT)

#Xd_LRT <- SummarizedExperiment::assay(vsd_LRT)[which(res_LRT_fdr0p1$padj<0.1),] #degのリスト #Xd <- SummarizedExperiment::assay(vsd)[which(res$padj<0.1),]
#Xs_LRT <- Xd_LRT %>% t %>% scale %>% t

#-- クラスタリングに使用したXd,Xsを書き出し --#
# 20191212作成 (あとで確認できるように)
# LRT_deglist_fdr0p1

#file_path <- paste(folder_name_plot_path, "clustering_vsdLRTall__", csvfilepath, ".csv",sep="") 
#SummarizedExperiment::assay(vsd_LRT) %>% as_tibble(rownames="ens_gene") %>% readr::write_csv(.,file_path)

#file_path <- paste(folder_name_plot_path, "clustering_XdLRTfdr0p1__", csvfilepath, ".csv",sep="") 
#Xd_LRT %>% as_tibble(rownames="ens_gene") %>% readr::write_csv(.,file_path) 
# これと同じ： SummarizedExperiment::assay(vsd_LRT) %>% as_tibble(rownames="ens_gene")  %>% filter(ens_gene %in% LRT_deglist_fdr0p1$ens_gene)

#--------#

#file_path <- paste(folder_name_plot_path, "clustering_XsLRTall__", csvfilepath, ".csv",sep="")
#SummarizedExperiment::assay(vsd_LRT) %>% t %>% scale %>% t %>% as_tibble(rownames="ens_gene") %>% readr::write_csv(.,file_path)

#file_path <- paste(folder_name_plot_path, "clustering_XsLRTfdr0p1__", csvfilepath, ".csv",sep="") 
#Xs_LRT %>% as_tibble(rownames="ens_gene") %>% readr::write_csv(.,file_path)
# これと同じ： SummarizedExperiment::assay(vsd_LRT) %>% t %>% scale %>% t %>% as_tibble(rownames="ens_gene") %>% filter(ens_gene %in% LRT_deglist_fdr0p1$ens_gene)
#---------------------------------------------#


zscore_type_LRT <- zscore_type %>% filter(ens_gene %in% LRT_deglist_fdr0p1$ens_gene)
nrow(zscore_type_LRT)

Xs_LRT <- zscore_type_LRT %>% dplyr::select(-ens_gene,-ext_gene, -biotype,-chr) %>% as.matrix()
rownames(Xs_LRT) <- zscore_type_LRT$ens_gene



##--------- clustering -----------#
set.seed(3)


km_LRT <- kmeans(Xs_LRT,4,nstart = 25,algorithm = "Lloyd")
kmc_LRT <- km_LRT$centers %>% as_tibble(rownames="cluster") %>% gather(sample,val,-cluster) %>% inner_join(def)

#kmc_LRT_group <- kmc_LRT %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#kmc_LRT_group <- kmc_LRT_group %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#kmc_LRT_group <- kmc_LRT_group %>% mutate(time=factor(time, c("UI","0h","24h","48h")))

kmc_LRT_group <- kmc_LRT %>% mutate(time=factor(time, c("UI", "0h","24h","48h"))) %>% mutate(type=factor(type,c("DoxPlus","DoxMinus"))) %>% mutate(rep=factor(rep, c("1", "2", "3", "4")))


gggglabel <- paste("3T3 Dox +-, k-means","[1]",km_LRT$size[1],"[2]",km_LRT$size[2],"[3]",km_LRT$size[3],"[4]",km_LRT$size[4],sep=" ")


#------- size -------#

print(km_LRT$size)  #4つのクラスター [1] 47 55 94 33

rrres_LRT <- km_LRT$cluster %>% tibble(ens_gene=names(.),cluster=.) %>% right_join(e2g,.) %>% arrange(cluster)

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__kmeans_cluster.csv",sep="") 
readr::write_csv(rrres_LRT,file_path)

##------- PCA -------#

pcacluster_save <- prcomp(Xs_LRT)$x %>% as_tibble %>% select(PC1,PC2) %>% mutate(cluster=km_LRT$cluster) %>% ggplot(aes(PC1,PC2,colour=factor(cluster)))+geom_point(size=1.5,alpha=0.6)+coord_fixed()+theme_linedraw()+ggsci::scale_color_d3("category20")

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__kmeans__pcacluster_PC1PC2.pdf",sep="") 
ggsave(plot=pcacluster_save,file=file_path, width = 10, height = 6, dpi = 120)
print(pcacluster_save)

pcacluster_save <- prcomp(Xs_LRT)$x %>% as_tibble %>% select(PC1,PC3) %>% mutate(cluster=km_LRT$cluster) %>% ggplot(aes(PC1,PC3,colour=factor(cluster)))+geom_point(size=1.5,alpha=0.6)+coord_fixed()+theme_linedraw()+ggsci::scale_color_d3("category20")

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__kmeans__pcacluster_PC1PC3.pdf",sep="") 
ggsave(plot=pcacluster_save,file=file_path, width = 10, height = 6, dpi = 120)
print(pcacluster_save)


#================================================#
# mouseCTX 0438を参考に。

#------------------#
f_cluster <- function(x) x %>% group_by(group, type, time, cluster) %>% summarise(avg=mean(val),se=sd(val)/sqrt(length(val))) %>% ungroup()
print(kmc_LRT_group %>% group_by(group, type, time) %>% summarise())

f_clusterp <- function(x) x %>% group_by(group, type, time, cluster) %>% summarise(avg=mean(val),se=sd(val)/sqrt(length(val))) %>% ungroup()
print(kmc_LRT_group %>% group_by(group, type, time) %>% summarise()) #作図用

#-------#

cluster_save <- kmc_LRT_group %>%
ggplot(aes(time,val,group=type,colour=type))+geom_line(aes(x=time,y=avg,colour=type),data=f_cluster)+geom_point()+facet_wrap(~cluster,2)+ggsci::scale_color_npg()+theme_minimal()+ theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1))  + ggtitle(gggglabel)

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_type.pdf",sep="") 
ggsave(plot=cluster_save,file=file_path, width = 6, height = 6, dpi = 120)
print(cluster_save)

#-------#
cluster_save <- kmc_LRT_group %>%
ggplot(aes(time,val,group=type,colour=type))+geom_point()+facet_wrap(~cluster,2)+ggsci::scale_color_npg()+theme_minimal()+ theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1))  + ggtitle(gggglabel)


file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_type_nonline.pdf",sep="") 
ggsave(plot=cluster_save,file=file_path, width = 6, height = 6, dpi = 120)
#------------------#
#================================================#



##--------- リストを保存 -------------#
## degの情報も付け加える (20191204 ver)。

#---- LRTの情報 baseMean等を取り出す ----#
#LRT_deglist_fdr0p1 <- as_tibble(res_LRT_fdr0p1,rownames="ens_gene") %>% right_join(e2g,.) %>% filter(padj<0.1) #クラスタリングに使用
#LRT_deglist_fdr <- as_tibble(res_LRT,rownames="ens_gene") %>% filter(padj<0.1)

LRT_deglist_fdr <- LRT_deglist_fdr0p1 %>% select(-(ext_gene),-(biotype),-(chr))
print(fdr)
nrow(LRT_deglist_fdr0p1)
#------------------------#

arrange_rrres_LRT <- rrres_LRT %>% arrange(ens_gene)
cluster_resLRT_fdr <- full_join(arrange_rrres_LRT, LRT_deglist_fdr, by="ens_gene") %>% arrange(cluster)
nrow(cluster_resLRT_fdr)

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_result.csv",sep="")
readr::write_csv(cluster_resLRT_fdr,file_path)

#-- 確認 --#
arrange_rrres_LRT %>% filter(ext_gene %in% c("Myh3","Ckm","Acta1","Tnnt2","Actb","Csrp3","Tpm2","Nsdhl"))
##------------------------------------#

```




```{r paper fig}
#20191205修正、作成

#===============================#
# mouseCTX 0438を参考に。

# strip.text.x = element_text(size=24,face="italic")
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合

cluster_save <- kmc_LRT_group %>% ggplot(aes(time,val,group=type,colour=type))+geom_line(aes(x=time,y=avg,colour=type),data=f_clusterp)+geom_point(size=2)+facet_wrap(~cluster,1) +theme_bw() + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24), legend.position = "top",  plot.title=element_text(size=10)) +ggsci::scale_color_npg() + ggtitle(gggglabel)

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_type__final.pdf",sep="") 
ggsave(plot=cluster_save,file=file_path, width = 12, height = 6, dpi = 120)
print(cluster_save)
#------------------#

cluster_save <- kmc_LRT_group %>% ggplot(aes(time,val,group=type,colour=type)) + geom_abline(intercept=0,slope=0,linetype="dashed",colour="gray") + geom_line(aes(x=time,y=avg,colour=type),data=f_clusterp) + geom_point(size=2)+facet_wrap(~cluster,1) +theme_bw() + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24), legend.position = "top",  plot.title=element_text(size=10)) +ggsci::scale_color_npg() + ggtitle(gggglabel)


file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__cluster_type__final_dash.pdf",sep="") 
ggsave(plot=cluster_save,file=file_path, width = 12, height = 6, dpi = 120)
print(cluster_save)
#------------------#

```


### TPM plot

(CTX day5 DEG Up Down ver)

#### calculate TPM

```{r TPM Matome def 0722}
# 2019 12月修正
# カウント0も表示するように変更 (20190722) BRB-seq_0432lane2_QC_tmpl_v6r190722_noumi_H3mm18_Dox_linear_0722 より

tpm <- umi %>% group_by(sample) %>% mutate(sample_total=sum(count),TPM=count/sum(count)*1e6) %>% ungroup
tpm_zero <- tpm %>% select(sample,ens_gene,TPM) %>% spread(sample,TPM,fill=0) %>% gather(sample, TPM, -ens_gene) #カウント0のサンプルは0を入れる 20190722追加して修正

tpm_def <- def %>% select(-count) %>% dplyr::inner_join(tpm_zero, by = "sample") #tpmをtpm_zeroに修正、20190722修正

#f <- function(x) x %>% group_by(group,type,growth,ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup() #重要(平均を求める、geom_smooth(se=FALSE)を使わないver)

#-- 確認 --#
umi %>% group_by(sample) %>% summarise(sum(count)) # 確認
tpm_zero %>% group_by(sample) %>% summarise(sum(TPM)) # 確認


#---------#
matome <- tpm_def %>% inner_join(e2g, by = "ens_gene") #191203

readr::write_csv(matome,paste(csvfilepath, "__TPM__final191205_last200811.csv",sep="")) #191203 追加

#readr::write_csv(matome,"BRB0438noumi_190515-H3mm18KO_CTX_S2-Day0_S3_l2fc_fdr0p2ver__TPM__final191204.csv") #191203 追加


```


#### TPM fig

使用するものをプロット (191203修正)

"Acta1","Myh3","Ckm","Tnnt2","Actb","Csrp3"

```{r TPM fig error var & boxplot 190722-1203 select7}

#======== Change every data ここで順番を変更 ========#

#matome_select <- matome %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#matome_select <- matome_select %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#matome_select <- matome_select %>% mutate(time=factor(time, c("UI","0h","24h","48h")))

matome_select <- matome %>% mutate(time=factor(time, c("UI", "0h","24h","48h"))) %>% mutate(type=factor(type,c("DoxPlus","DoxMinus"))) %>% mutate(rep=factor(rep, c("1", "2", "3", "4")))


#matome_select <- matome %>% filter(intact_CTX=="CTX"|intact_CTX=="SKM") %>%  mutate(WT_KO=factor(WT_KO, c("H3mm18KO","WT"))) %>% mutate(Day=factor(Day, c("Day0","Day5","Day14"))) %>% mutate(intact_CTX=factor(intact_CTX, c("CTX","SKM")))

#-------#

tbl <- matome_select %>% filter(ext_gene %in% c("Myh3","Ckm","Acta1","Tnnt2","Actb","Csrp3"))

#tbl2 <- matome_select %>% filter(ext_gene %in% c("Acta1","Tpm2"))

#====================================================#

#f <- function(x) x %>% group_by(WT_KO,Day,intact_CTX,ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup() #重要 (CTX用に変更)

f <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup()
#----#
tbl %>% group_by(group, type, time) %>% summarize()
#----#

#face="italic"
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合

### point ###
gggggpp <-  ggplot(tbl,aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",2)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top",  plot.title=element_text(size=16))+ggsci::scale_color_npg()


file_path <- paste("TPM__", csvfilepath, "__with_point__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 10)

#ggsave(file=file_path, plot = gggggpp)
#ggsave(file="TPM__BRB0438noumi_H3mm18KO_CTX_S2-Day0_S3__with_point__final191204.pdf", plot = gggggpp)
print(gggggpp)

gggggpp <-  ggplot(tbl,aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",2)+geom_smooth(se=FALSE)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top",  plot.title=element_text(size=16))+ggsci::scale_color_npg()

file_path <- paste("TPM__", csvfilepath, "__with_point__final191205_last200811_smooth.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 10)

#ggsave(file=file_path, plot = gggggpp)
#ggsave(file="TPM__BRB0438noumi_H3mm18KO_CTX_S2-Day0_S3__with_point__final191204_smooth.pdf", plot = gggggpp)
#print(gggggpp)


```

"Acta1","Myh3","Nsdhl"

```{r paper fig TPM select2}
#20191204 作成

tbl2 <- matome_select %>% filter(ext_gene %in% c("Acta1","Myh3","Nsdhl"))
#%>% mutate(ext_gene=factor(ext_gene, c("Acta1","Nsdhl","Myh3")))

#===============================#
## SKMもCTXと同じ色で表示

### point ###
gggggpp <-  ggplot(tbl2,aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y")+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24,face="italic"), legend.position = "top",  plot.title=element_text(size=20)) +ggsci::scale_color_npg()  + ggtitle("3T3 Dox +-")

file_path <- paste("TPM__", csvfilepath, "__with_point__Acta1_Myh3_Nsdhl__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 6) #2つ図なら width = 8
print(gggggpp)

#ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 8, height = 6) #2つ図なら width = 8
#ggsave(file="TPM__BRB0438noumi_H3mm18KO_CTX_S2-Day0_S3__with_point__Acta1_Tpm2__final191204.pdf", plot = gggggpp, dpi = 100, width = 8, height = 6)


```
"Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"

```{r paper fig TPM select3, fig.width=12, fig.height=5}
#20191204 作成

rrres_LRT %>% filter(ext_gene %in% c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))

nbl3 <- matome_select %>% filter(ext_gene %in% c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))  %>% mutate(ext_gene=factor(ext_gene,c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))) 


#===============================#
## SKMもCTXと同じ色で表示

### point ###
gggggpp <-  ggplot(nbl3,aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y")+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24,face="italic"), legend.position = "top",  plot.title=element_text(size=20)) +ggsci::scale_color_npg()  + ggtitle("3T3 Dox +-")

file_path <- paste("TPM__", csvfilepath, "__with_point__Cluste_Rep__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 6) #2つ図なら width = 8
print(gggggpp)

#ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 8, height = 6) #2つ図なら width = 8
#ggsave(file="TPM__BRB0438noumi_H3mm18KO_CTX_S2-Day0_S3__with_point__Acta1_Tpm2__final191204.pdf", plot = gggggpp, dpi = 100, width = 8, height = 6)


```

ALL DEG, LRT FDR < 0.1


```{r Matome TPM DEGfig cluster}
# DEGをclusterに分けて表示 (191206作成)

#-- 設定 -----#
plot_title1 <- "TPM"
plot_title2 <- "DEG (LRT): BRB-seq 3T3 Dox +-"
#folder_name <- "LRT"

#-- ファイル名 の設定 ---#
plot_title0 <- paste(plot_title1, plot_title2, sep=", ")
#folder_name_plot0 <- paste(".",folder_name, plot_title1,"",sep="/")
#folder_name_plot_path <- paste(folder_name_plot0,paste(folder_name,plot_title1,"",sep="_"),sep="")

#------------------------#

#===============================#

#----- 上で出てきたDEGのリストの取り出し -----#
deg_cluster_list <- rrres_LRT  %>% arrange(cluster)
deg_cluster_size <- deg_cluster_list %>% arrange(ens_gene) %>% group_by(cluster) %>% summarize(size=n())

#----- DEGのみ取り出す ------#
deg_matome <- matome %>% filter(ens_gene %in% deg_cluster_list$ens_gene) %>% right_join(deg_cluster_list %>% select(ens_gene,cluster), by = "ens_gene")
#deg_matome <- deg_matome %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#deg_matome <- deg_matome %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#deg_matome <- deg_matome %>% mutate(time=factor(time, c("UI","0h","24h","48h")))


deg_matome <- deg_matome %>% mutate(time=factor(time, c("UI", "0h","24h","48h"))) %>% mutate(type=factor(type,c("DoxPlus","DoxMinus"))) %>% mutate(rep=factor(rep, c("1", "2", "3", "4")))


#------------------------------#

f <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup()

#===============================#
### point ###
## クラスターごと ##
for (i in 1:4) {
  cluster_now <- paste("cluster",deg_cluster_size$cluster[i],sep="")
  plot_title0_cluster <- paste(plot_title0, cluster_now, deg_cluster_size$size[i], sep=", ") #クラスターの個数も表示(20191025)
  
  print(plot_title0_cluster)
  
  ppplotsize <- (as.integer(deg_cluster_size$size[i]/10) + 1) * 2 + 1
  
  cluster_plotsave <- deg_matome %>% filter(cluster==deg_cluster_size$cluster[i]) %>%  ggplot(aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle(plot_title0_cluster)
  
  
  file_path <- paste(folder_name_plot_path, "TPM__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_",cluster_now,".pdf",sep="") 
  ggsave(plot=cluster_plotsave,file=file_path, width = 20, height = ppplotsize, dpi = 120, limitsize = FALSE)

}

#===============================#
### point ###
## まとめて ##

#-- 斜めのラベル --#

ppplotsize <- (as.integer(nrow(deg_cluster_list)/10) + 1) * 2 + 1

cluster_plotsave <- deg_matome %>% ggplot(aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle("3T3 Dox +-")


file_path <- paste(folder_name_plot_path, "TPM__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_all_1.pdf",sep="") 
ggsave(plot=cluster_plotsave,file=file_path, width = 20, height = ppplotsize, dpi = 360, limitsize = FALSE)

#-- 横向きのラベル --#

ppplotsize <- ppplotsize * 1.25

cluster_plotsave <- deg_matome %>% ggplot(aes(time,TPM,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle("3T3 Dox +-")

file_path <- paste(folder_name_plot_path, "TPM__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_all_2.pdf",sep="") 
ggsave(plot=cluster_plotsave,file=file_path, width = 25, height = ppplotsize, dpi = 360, limitsize = FALSE)


#--memo--#
#face="italic"
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合

```

TPM作図終了


### normalizedcount plot

#### set normCount table

```{r normCount Matome}
# 2019 12月作成

nrow(normalizedcount)
nrow(normalizedcount %>% inner_join(e2g, by = "ens_gene"))

#---------#
norm_plotlist_all <- normalizedcount %>% gather("sample", "normalized",-(ens_gene)) %>% inner_join(def, by = "sample") %>% inner_join(e2g, by = "ens_gene") 
#norm_plotlist_all <- norm_plotlist_all %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#norm_plotlist_all <- norm_plotlist_all %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#norm_plotlist_all <- norm_plotlist_all %>% mutate(time=factor(time, c("UI","0h","24h","48h")))

norm_plotlist_all <- norm_plotlist_all %>% mutate(time=factor(time, c("UI", "0h","24h","48h"))) %>% mutate(type=factor(type,c("DoxPlus","DoxMinus"))) %>% mutate(rep=factor(rep, c("1", "2", "3", "4")))

readr::write_csv(norm_plotlist_all,paste(csvfilepath, "__normCount__final191205_last200811.csv",sep="")) #191206 追加

```

#### normcount fig

ALL DEG, LRT FDR < 0.1 (Total 229)

```{r plot clustering by BRB normcount} 
# 20191205修正、20191024
# DEGをclusterに分けて表示 (191206作成)

#-- 設定 -----#
plot_title1 <- "normCount"
plot_title2 <- "DEG (LRT): BRB-seq 3T3 Dox +-"
#folder_name <- "LRT"

#-- ファイル名 の設定 ---#
plot_title0 <- paste(plot_title1, plot_title2, sep=", ")
#folder_name_plot0 <- paste(".",folder_name, plot_title1,"",sep="/")
#folder_name_plot_path <- paste(folder_name_plot0,paste(folder_name,plot_title1,"",sep="_"),sep="")

#------------------------#

#===============================#
## norm count 後のデータ

#----- 上で出てきたDEGのリストの取り出し -----#
#----- DEGのみ取り出す ------#
deg_normcount <- norm_plotlist_all %>% filter(ens_gene %in% deg_cluster_list$ens_gene) %>% right_join(deg_cluster_list %>% select(ens_gene,cluster), by = "ens_gene")
#deg_normcount <- deg_normcount %>% mutate(growth=factor(growth, c("UI","Diff0h","Diff24h","Diff48h"))) %>% mutate(type=factor(type, c("Doxplus","Doxminus")))
#deg_normcount <- deg_normcount %>% mutate(time=case_when(growth=="UI" ~"UI",growth=="Diff0h"~"0h",growth=="Diff24h"~"24h",growth=="Diff48h"~"48h",TRUE~"error"))
#deg_normcount <- deg_normcount %>% mutate(time=factor(time, c("UI","0h","24h","48h")))

#plotlist_cluster <- deg_normcount
#------------------------------#


f_gene_norm <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(normalized),se=sd(normalized)/sqrt(length(normalized))) %>% ungroup()
#f <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(TPM),se=sd(TPM)/sqrt(length(TPM))) %>% ungroup()

#===============================#
### point ###
## クラスターごと ##
for (i in 1:4) {
  cluster_now <- paste("cluster",deg_cluster_size$cluster[i],sep="")
  plot_title0_cluster <- paste(plot_title0, cluster_now, deg_cluster_size$size[i], sep=", ") #クラスターの個数も表示(20191025)
  
  print(plot_title0_cluster)
  
  ppplotsize <- (as.integer(deg_cluster_size$size[i]/10) + 1) * 2 + 1
  
  cluster_plotsave <- deg_normcount %>% filter(cluster==deg_cluster_size$cluster[i]) %>%  ggplot(aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle(plot_title0_cluster) + ylab("normalized count") 
  
  
  file_path <- paste(folder_name_plot_path, "normCount__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_",cluster_now,".pdf",sep="") 
  ggsave(plot=cluster_plotsave,file=file_path, width = 20, height = ppplotsize, dpi = 120, limitsize = FALSE)

}

#===============================#
### point ###
## まとめて ##

#-- 斜めのラベル --#

ppplotsize <- (as.integer(nrow(deg_cluster_list)/10) + 1) * 2 + 1

cluster_plotsave <- deg_normcount %>% ggplot(aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle("3T3 Dox +-") + ylab("normalized count") 


file_path <- paste(folder_name_plot_path, "normCount__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_all_1.pdf",sep="") 
ggsave(plot=cluster_plotsave,file=file_path, width = 20, height = ppplotsize, dpi = 360, limitsize = FALSE)

#-- 横向きのラベル --#

ppplotsize <- ppplotsize * 1.25

cluster_plotsave <- deg_normcount %>% ggplot(aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",ncol=10)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=18,face="italic"), legend.position = "top",  plot.title=element_text(size=16)) +ggsci::scale_color_npg() + ggtitle("3T3 Dox +-") + ylab("normalized count") 

file_path <- paste(folder_name_plot_path, "normCount__", csvfilepath, "__LRTDEG_fdr0p1__final191205_last200811_all_2.pdf",sep="") 
ggsave(plot=cluster_plotsave,file=file_path, width = 25, height = ppplotsize, dpi = 360, limitsize = FALSE)


#--memo--#
#face="italic"
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合


```

使用するものをプロット

"Acta1","Myh3","Ckm","Tnnt2","Actb","Csrp3"

```{r normcount fig error var & boxplot 190722-1203 select7}

#======== Change every data ここで順番を変更 ========#

#-------#

nbl <- norm_plotlist_all %>% filter(ext_gene %in% c("Myh3","Ckm","Acta1","Tnnt2","Actb","Csrp3"))

#====================================================#

#f_gene_norm <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(normalized),se=sd(normalized)/sqrt(length(normalized))) %>% ungroup()

#----#
nbl %>% group_by(group, type, time) %>% summarize()
#----#

#face="italic"
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合

### point ###
gggggpp <-  ggplot(nbl,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",2)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top",  plot.title=element_text(size=16))+ggsci::scale_color_npg()  + ylab("normalized count")

file_path <- paste("normCount__", csvfilepath, "__with_point__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 10)

print(gggggpp)


gggggpp <-  ggplot(nbl,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",2)+geom_smooth(se=FALSE)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top",  plot.title=element_text(size=16))+ggsci::scale_color_npg()  + ylab("normalized count")

file_path <- paste("normCount__", csvfilepath, "__with_point__final191205_last200811_smooth.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 10)


```

"Acta1","Myh3","Nsdhl"

```{r paper fig normcount select2}
#20191204 作成

nbl2 <- norm_plotlist_all %>% filter(ext_gene %in% c("Acta1","Myh3","Nsdhl"))

#%>% mutate(ext_gene=factor(ext_gene, c("Acta1","Nsdhl","Myh3")))

#===============================#

### point ###
gggggpp <-  ggplot(nbl2,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y")+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24,face="italic"), legend.position = "top",  plot.title=element_text(size=20)) +ggsci::scale_color_npg()  + ggtitle("3T3 Dox +-")  + ylab("normalized count")

file_path <- paste("normCount__", csvfilepath, "__with_point__Acta1_Myh3_Nsdhl__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 6) #2つ図なら width = 8
print(gggggpp)


```


```{r paper fig normcount select3, fig.width=12, fig.height=5}
#20191204 作成

rrres_LRT %>% filter(ext_gene %in% c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))

nbl3 <- norm_plotlist_all %>% filter(ext_gene %in% c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))  %>% mutate(ext_gene=factor(ext_gene,c("Slc38a2","Inhba","Acta1","Myog","Myh9","Rpl13"))) 

#%>% mutate(ext_gene=factor(ext_gene, c("Acta1","Nsdhl","Myh3")))

#===============================#

### point ###
gggggpp <-  ggplot(nbl3,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",nrow=1)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=20), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=20),axis.title.x = element_blank(), legend.title=element_text(size=20), legend.text = element_text(size=20), strip.background = element_blank(), strip.text.x = element_text(size=24,face="italic"), legend.position = "top",  plot.title=element_text(size=20)) +ggsci::scale_color_npg()  + ggtitle("3T3 Dox +-")  + ylab("normalized count")

file_path <- paste("normCount__", csvfilepath, "__with_point__Cluste_Rep__final191205_last200811.pdf",sep="")
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 16, height = 5) #2つ図なら width = 8
print(gggggpp)

nbl3
```

ここまで実行


### GO解析 

####  クラスタリング の結果をGO
20191206修正

```{r GO Load list part2-1}
#20191206修正

file_path <- paste(folder_name_plot_path, "DEG_fdr0p1__", csvfilepath, "_kmeans4__kmeans_cluster.csv",sep="") 
print(file_path)

file_degcluster <- file_path

table_degcluster <- readr::read_csv(file_degcluster) %>% arrange(ens_gene) %>% dplyr::select(ens_gene,ext_gene,cluster)
table_degcluster %>% group_by(cluster) %>% summarise(size=n())

##### FDR setting ######
gofdr <- 0.1

cluster_num <- 4

```


```{r go clusterProfile part2-2}
# 20191206修正

library(clusterProfiler)
library(org.Mm.eg.db)

folder_path <- "./LRT/clusterProfile/"

#-------------#
file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans_BPfdr0p1_generatio",sep="")
filename_csv <- file_path

file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans_BPfdr0p1_generatio_cluster",sep="")
filename_list <- file_path

print(filename_list)
print(filename_csv)

#例 filename_list <- "./LRT/clusterProfile/H3mm18KO_mouseCTX_BRB0438_day5_2gunfdr0p2_kmeans_BPfdr0p1_generatio_cluster"
#例 filename_csv <- "./LRT/clusterProfile/H3mm18KO_mouseCTX_BRB0438_day5_2gunfdr0p2_kemans_BPfdr0p1_generatio"
#-------------#

cluster_list <- as.list(NA) #初期化

for (i in 1:cluster_num) {
   pre_list <- as.list(NA)
   pre_list <- table_degcluster %>% filter(cluster==as.double(i)) %>% dplyr::select(ens_gene) %>% as.list()
   names(pre_list) <- paste("ENSEMBL",as.character(i),sep="_")
 
   if (i == 1) { 
     cluster_list <- pre_list
   } 
   else cluster_list <- c(cluster_list, pre_list) 
}


for (i in 1:cluster_num) {
   print(paste(i, cluster_list[[i]] %>% tibble::enframe(name = NULL) %>% nrow(), sep=", "))
  
   pre_ego_BP <- enrichGO(gene = cluster_list[[i]],
                 OrgDb = "org.Mm.eg.db",
                 keyType = 'ENSEMBL',
                 ont = "BP",
                 pAdjustMethod = "BH",
                 pvalueCutoff  = gofdr, qvalueCutoff  = 1.0) #20191211修正  pvalueCutoff  = fdr
   
   ## pvalue < qvalue < p.adjust ##
   # qvalueCutoff  = 0.3  qvalueCutoff  = 0.2 , qvalueCutoff  = 1.0

  
   if (i == 1) { 
     table_ego_BP <- data.frame(pre_ego_BP) %>% mutate(cluster=paste("cluster",as.character(i),sep=""))  # リスト型からデータフレームへ変換
   } 
   else table_ego_BP <- table_ego_BP %>% bind_rows(data.frame(pre_ego_BP) %>% mutate(cluster=paste("cluster",as.character(i),sep="")))
   
   #---- plot ---#
   BPplot <- dotplot(pre_ego_BP, showCategory=30, orderBy = "Count") #clusterProfile の機能で図を描く(191106修正) wrong orderBy parameter; set to default `orderBy = "x"`
   print(BPplot)
   ggsave(BPplot,file=paste(filename_list,as.character(i),".png",sep=""), width = 8, height = 12, dpi = 120)
   
   BPplot <- dotplot(pre_ego_BP, showCategory=10, orderBy = "Count") #clusterProfile の機能で図を描く(191106修正) wrong orderBy parameter; set to default `orderBy = "x"`
   print(BPplot)
   ggsave(BPplot,file=paste(filename_list,as.character(i),"_Category10.png",sep=""), width = 8, height = 4, dpi = 120)
   
   BPplot <- dotplot(pre_ego_BP, showCategory=5, orderBy = "Count") #clusterProfile の機能で図を描く(191106修正) wrong orderBy parameter; set to default `orderBy = "x"`
   print(BPplot)
   ggsave(BPplot,file=paste(filename_list,as.character(i),"_Category5.png",sep=""), width = 8, height = 3, dpi = 120)
}

print(table_ego_BP %>% group_by(cluster) %>% summarize())

#------#
# データはtable_ego_BPに。


```

```{r go clusterProfile part2-3}
# 20191206修正
#------------------------------------------------------#
# テーブルを保存
# table_ego_BP_3t3_LRT2 <- table_ego_BP

table_ego_BP1 <- table_ego_BP %>% mutate(cluster=factor(cluster,c("cluster1","cluster2","cluster3","cluster4"))) %>% arrange(cluster,desc(Count)) #191106

readr::write_csv(table_ego_BP1,paste(filename_csv,".csv",sep=""))

# 先のテーブルのgeneIDをgene nameに置換する。(20191025)

tablego <- table_ego_BP1 %>% mutate(gene_name=geneID) %>% dplyr::select(-(qvalue))

for (i in 1:nrow(table_degcluster)) {
  tablego <- tablego %>% mutate(gene_name=gsub(gene_name, pattern=table_degcluster$ens_gene[i], replacement=table_degcluster$ext_gene[i], ignore.case = TRUE))
}

print(tablego)

readr::write_csv(tablego,paste(filename_csv,"_genename.csv",sep=""))

#------------------------------------------------------#

#GOのtermの数
print(tablego %>% group_by(cluster) %>% summarize(cluster_3t3Dox_num = dplyr::n()))

## 変更 ##
table_ego_BP_2gunfdr0p2_cluster <- tablego

#--- メモ ----#
#tableggg <- table_ego_clustercluster
#colm <- tableggg$geneID
#for (i in 1:88) {
#  colm <- sub(rrres_cluster3$ens_gene[i], rrres_cluster3$ext_gene[i], colm)
#}
#print(colm)

```

```{r clusterProfile part2-4}
# 20191206, 191211修正
# Benjamini correction を p-adjust として使用する

file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans_BPfdr0p1.pdf",sep="")
file_BP_plot <- file_path

file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans__BPfdr0p1_muscleonly.pdf",sep="")
file_BP_plot_muscle <- file_path

print(file_BP_plot)
print(file_BP_plot_muscle)

# 例 file_BP_plot <- "./2gun/clusterProfile/BPfdr0p1__H3mm18KO_mouseCTX_BRB0438_day5_2gunfdr0p2_kmeans.pdf"
# 例 file_BP_plot_muscle <- "./2gun/clusterProfile/BPfdr0p1__H3mm18KO_mouseCTX_BRB0438_day5_2gunfdr0p2_kmeans_muscleonly.pdf"

#--------------------#

BP_matome <- tablego

rowlength <- BP_matome %>% group_by(Description) %>% summarize() %>% nrow()
BP_plot <- BP_matome %>% filter(p.adjust<gofdr) %>% ggplot(aes(x=cluster, y=reorder(Description,Count), size=Count, fill=p.adjust)) + geom_point(shape = 21) + theme(strip.text = element_text(size = 10)) + theme_minimal() + theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_gradient(low = "red" , high = "blue")+ xlab("3T3 Dox +-") + ylab("GO Description (BP)") + labs(fill=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(BP_plot)
ggsave(plot=BP_plot,file=file_BP_plot, width = 10, height = (5+rowlength/6), dpi = 120,limitsize = FALSE)

#---muscle関連のみ
BP_matome_muscle <- BP_matome %>% filter(grepl("muscle", Description)|grepl("myo", Description))
rowlength <- BP_matome_muscle %>% group_by(Description) %>% summarize() %>% nrow()
BP_plot_muscle <- BP_matome_muscle %>% filter(p.adjust<gofdr) %>% ggplot(aes(x=cluster, y=reorder(Description,Count), size=Count, fill=p.adjust)) + geom_point(shape = 21) + theme(strip.text = element_text(size = 10)) + theme_minimal() + theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_gradient(low = "red" , high = "blue") + xlab("3T3 Dox +- (muscle)") + ylab("GO Description (BP)") + labs(fill=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(BP_plot_muscle)
ggsave(BP_plot_muscle,file=file_BP_plot_muscle, width = 8, height = (5+rowlength/6), dpi = 120,limitsize = FALSE)


```


```{r wite result table clusterProfile part2-5}
# 20191206修正
# 20191003追加, 191120変更

#----- countのみのtable --------------#
aaa_tablego <- tablego %>% dplyr::select(ID,Description,cluster,Count) %>% spread(key=cluster,value = Count,fill=0) %>% mutate(Total= cluster2 + cluster3 +cluster4)  %>% arrange(desc(Total))

#aaa_tablego <- tablego %>% dplyr::select(ID,Description,cluster,Count) %>% spread(key=cluster,value = Count,fill=0) %>% mutate(Total= cluster1 +cluster2 + cluster3 +cluster4)  %>% arrange(desc(Total))

file_path <- paste(folder_path, "DEG_fdr0p1__", csvfilepath, "_kmeans__BPfdr0p1__count_table.csv",sep="")
print(file_path)
readr::write_csv(aaa_tablego,file_path)

#例 "./2gun/clusterProfile/BPfdr0p1__H3mmKO_mouseCTX_BRB0438_2gunfdr0p2_kemans__count_table.csv"
#-------------------------------------#

```

```{R memo part2-6}
# x=pvalue, y=p.adjust
plottt <- table_ego_BP %>% ggplot(aes(x=pvalue, y=p.adjust, size=Count)) + geom_point()+geom_abline(intercept=0,slope=1.0,linetype="dashed",colour="blue") + xlim(0,NA) + ylim(0,NA) + ggtitle(label=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(plottt)

# x=pvalue, y=qvalue
plottt <- table_ego_BP %>% ggplot(aes(x=pvalue, y=qvalue, size=Count)) + geom_point()+geom_abline(intercept=0,slope=1.0,linetype="dashed",colour="blue") + xlim(0,NA) + ylim(0,NA) + ggtitle(label=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(plottt)

# x=p.adjust, y=qvalue
plottt <- table_ego_BP %>% ggplot(aes(x=p.adjust, y=qvalue, size=Count)) + geom_point()+geom_abline(intercept=0,slope=1.0,linetype="dashed",colour="blue") + xlim(0,NA) + ylim(0,NA) + ggtitle(label=paste("p.adjust (BH) < ",as.character(gofdr),sep=""))
print(plottt)


## pvalue < qvalue < p.adjust ##
# pvalue < p.adjust
# pvalue < qvalue
# qvalue < p.adjust

#---------------------#

#[BBRB-seq_0438_QC_tmpl_v6_noumi_190515-H3mm18KO_CTX_S2-Day0_S3_fdr0p2ver_and_LRT_191024  (umi補正なし,fdr0.2) (TPM 190722ver) (190924を元に) (190627-1024)]を参考にした。

#pvalueCutoff	
#pvalue cutoff on enrichment tests to report

#pAdjustMethod	
#one of "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"

#qvalueCutoff	
#qvalue cutoff on enrichment tests to report as significant. Tests must pass i) pvalueCutoff on unadjusted pvalues, ii) pvalueCutoff on adjusted pvalues and iii) qvalueCutoff on qvalues to be reported.

# 設定(pvalueCutoff  = 0.1, qvalueCutoff  = 0.2)だと、p値<0.1, p.adjust値<0.1, q値<0.2 になっている。

```

GOここまで

この後に以前はfantom5のデータのコードが入っていたが、カット。
mouse CTX (BRB 0438) の結果との比較もカット



